<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v27i1e63550</article-id>
      <article-id pub-id-type="pmid">39919289</article-id>
      <article-id pub-id-type="doi">10.2196/63550</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>ChatGPT for Univariate Statistics: Validation of AI-Assisted Data Analysis in Healthcare Research</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Jin</surname>
            <given-names>Qiao</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Huang</surname>
            <given-names>Yeen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Piccolo</surname>
            <given-names>Stephen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Borges</surname>
            <given-names>Endler Marcel</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>Fangyuan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Ruta</surname>
            <given-names>Michael R</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>University of Arizona College of Medicine – Phoenix</institution>
            <addr-line>475 N 5th St</addr-line>
            <addr-line>Phoenix, AZ, 85004</addr-line>
            <country>United States</country>
            <phone>1 602 827 2002</phone>
            <email>mruta@arizona.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-1246-3360</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Gaidici</surname>
            <given-names>Tony</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-8175-0713</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Irwin</surname>
            <given-names>Chase</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3165-0693</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Lifshitz</surname>
            <given-names>Jonathan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4398-6493</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>University of Arizona College of Medicine – Phoenix</institution>
        <addr-line>Phoenix, AZ</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Michael R Ruta <email>mruta@arizona.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>7</day>
        <month>2</month>
        <year>2025</year>
      </pub-date>
      <volume>27</volume>
      <elocation-id>e63550</elocation-id>
      <history>
        <date date-type="received">
          <day>23</day>
          <month>6</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>31</day>
          <month>8</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>19</day>
          <month>10</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>24</day>
          <month>11</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Michael R Ruta, Tony Gaidici, Chase Irwin, Jonathan Lifshitz. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 07.02.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2025/1/e63550" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>ChatGPT, a conversational artificial intelligence developed by OpenAI, has rapidly become an invaluable tool for researchers. With the recent integration of Python code interpretation into the ChatGPT environment, there has been a significant increase in the potential utility of ChatGPT as a research tool, particularly in terms of data analysis applications.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to assess ChatGPT as a data analysis tool and provide researchers with a framework for applying ChatGPT to data management tasks, descriptive statistics, and inferential statistics.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A subset of the National Inpatient Sample was extracted. Data analysis trials were divided into data processing, categorization, and tabulation, as well as descriptive and inferential statistics. For data processing, categorization, and tabulation assessments, ChatGPT was prompted to reclassify variables, subset variables, and present data, respectively. Descriptive statistics assessments included mean, SD, median, and IQR calculations. Inferential statistics assessments were conducted at varying levels of prompt specificity (“Basic,” “Intermediate,” and “Advanced”). Specific tests included chi-square, Pearson correlation, independent 2-sample <italic>t</italic> test, 1-way ANOVA, Fisher exact, Spearman correlation, Mann-Whitney <italic>U</italic> test, and Kruskal-Wallis <italic>H</italic> test. Outcomes from consecutive prompt-based trials were assessed against expected statistical values calculated in Python (Python Software Foundation), SAS (SAS Institute), and RStudio (Posit PBC).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>ChatGPT accurately performed data processing, categorization, and tabulation across all trials. For descriptive statistics, it provided accurate means, SDs, medians, and IQRs across all trials. Inferential statistics accuracy against expected statistical values varied with prompt specificity: 32.5% accuracy for “Basic” prompts, 81.3% for “Intermediate” prompts, and 92.5% for “Advanced” prompts.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>ChatGPT shows promise as a tool for exploratory data analysis, particularly for researchers with some statistical knowledge and limited programming expertise. However, its application requires careful prompt construction and human oversight to ensure accuracy. As a supplementary tool, ChatGPT can enhance data analysis efficiency and broaden research accessibility.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>ChatGPT</kwd>
        <kwd>data analysis</kwd>
        <kwd>statistics</kwd>
        <kwd>chatbot</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>biomedical research</kwd>
        <kwd>programmers</kwd>
        <kwd>bioinformatics</kwd>
        <kwd>data processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>ChatGPT is a conversational artificial intelligence (AI) created by OpenAI. It has quickly become an invaluable resource for researchers with capabilities that include reviewing literature, identifying gaps in research, and drafting papers [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. With the recent addition of Python code interpretation to the ChatGPT environment, a surge of new research applications has emerged, particularly in data analysis [<xref ref-type="bibr" rid="ref6">6</xref>]. Accompanied by innovations such as data upload and download, this new feature marks a considerable advancement toward individualized AI-assisted data analysis.</p>
      <p>The accessibility of ChatGPT has the potential to democratize data analysis for nonspecialists and serve as a bridge between programming knowledge and the growing demands of biomedical research [<xref ref-type="bibr" rid="ref7">7</xref>]. Previously, ChatGPT had proven to be an asset to programmers, with capabilities ranging from debugging and annotating code to translating between coding languages [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. However, for individuals without coding experience, deploying a local programming environment posed a significant challenge [<xref ref-type="bibr" rid="ref7">7</xref>]. ChatGPT’s ability to interpret code bypasses this barrier of entry, making language-to-code translation more accessible [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>ChatGPT has excelled in some recent data analysis applications by completing bioinformatics exercises with high accuracy [<xref ref-type="bibr" rid="ref6">6</xref>]. A preliminary analysis of ChatGPT’s utility as a data analysis tool found that it provided results consistent with traditional biostatistical software [<xref ref-type="bibr" rid="ref11">11</xref>]. However, the extent to which ChatGPT can assist with data analysis when the research question is cross-disciplinary remains unclear. For example, ChatGPT’s utility was limited in addressing complex bioinformatics tasks, citing restricted file size and exclusive support for Python as key obstacles [<xref ref-type="bibr" rid="ref7">7</xref>]. Other studies have recommended against using ChatGPT for statistical analysis due to incorrect answers, mislabeled data, and speculative results [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Continual improvements to the GPT models and refined prompts may improve the value proposition of ChatGPT in biostatistics.</p>
      <p>As the capabilities of AI technology expand, the next step in democratizing research is to develop ChatGPT’s applications for basic statistical analysis. Our research aims to provide a framework for performing preliminary inferential and descriptive statistics, as well as data management within ChatGPT. The ultimate goal is to avail a more equitable research landscape without the barriers presented by coding knowledge, thereby broadening access and innovation within the research community.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Dataset and Analyses</title>
        <p>The 2019 National Inpatient Sample data from the National Healthcare Cost and Utilization Project was selected to assess ChatGPT’s capabilities. This dataset was chosen to represent real-world observational data, similar to typical clinical research studies. Also, these data require processing before testing an array of hypotheses with various statistical tests. This is not a public-use dataset. For researchers interested in replicating these methods, this dataset is often available through institutional access.</p>
        <p>Arbitrary inclusion criteria were selected to make the data both manageable and relevant to the aims of this study. ChatGPT’s data and processing limits required uploaded files to be less than 100 megabytes. The inclusion criteria were individuals aged 41-70 years old who endured both a cerebral infarct and a myocardial infarction, with total hospital charges less than US $400,000. The resulting dataset included 2740 observations each with 5 attributes: age, gender, race, length of stay, and total charges. The 5 attributes included continuous, categorical, and binary data elements.</p>
        <p>Common statistical methods for univariate analysis were used to evaluate ChatGPT’s capabilities, including both descriptive and inferential statistics. Data processing, categorization, and tabulation were included as assessments of data management. Each assessment was conducted in a new GPT-4 window with the memory feature disabled. For each iteration, the Microsoft Excel data file and prompt were entered sequentially into the same ChatGPT window.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The National Healthcare Cost and Utilization Project is a limited dataset, and it was deemed exempt by the institutional review board at the University of Arizona.</p>
      </sec>
      <sec>
        <title>Data Management and Descriptive Statistics</title>
        <p>For data processing, ChatGPT was prompted to create a new variable, reclassifying “race” from 4 dimensions (White, Black, Hispanic, and Other) to 2 dimensions (White and non-White). ChatGPT was also prompted to create a subset of patients aged 45 years to facilitate nonparametric testing later on. The decision to select 45-year-olds within the original sample was arbitrary, intended solely to create a smaller subpopulation suitable for nonparametric tests. For categorization, ChatGPT was prompted to categorize patients into 3 age cohorts (41-50, 51-60, and 61-70 years). Finally, data tabulation involved creating a series table to display the frequencies for the variables used in the categorization and processing steps. The prompts used for each element of data processing and categorization can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>For descriptive statistics, the methods tested were mean, SD, median, and IQR. ChatGPT was prompted to compute these values for male and female cohorts, as well as the entire dataset for the following variables: age, length of stay, and total charges. All descriptive statistics prompts were posed as directed questions without opportunity for interpretation. Descriptive statistics prompts can be found in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Inferential Statistics</title>
        <p>Inferential statistics were performed as both parametric tests (chi-square, Pearson correlation, independent 2-sample <italic>t</italic> test, and 1-way ANOVA) and equivalent nonparametric tests (Fisher exact, Spearman rank order correlation, Mann-Whitney <italic>U</italic> test, and Kruskal-Wallis <italic>H</italic> test). The pairings of statistical tests are displayed in <xref ref-type="table" rid="table1">Table 1</xref>. A series of research questions were developed to guide statistical method selection (MS) toward a specific test, as shown in <xref ref-type="table" rid="table2">Table 2</xref>. Once refined, each research question was posed to ensure that a 2-tailed analysis was conducted, rather than a 1-tailed. The research questions and their proposed analytical tests were selected by one of the authors, a biostatistician, within the medical school.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Parametric and nonparametric statistical analysis pairs.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Parametric test</td>
                <td>Nonparametric test</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Chi-square<sup>a</sup></td>
                <td>Fisher exact</td>
              </tr>
              <tr valign="top">
                <td>Pearson correlation</td>
                <td>Spearman rank order correlation</td>
              </tr>
              <tr valign="top">
                <td>Independent 2-sample <italic>t</italic> test</td>
                <td>Mann-Whitney <italic>U</italic></td>
              </tr>
              <tr valign="top">
                <td>1-way ANOVA</td>
                <td>Kruskal-Wallis <italic>H</italic></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Chi-square is a nonparametric test, as it does not require the assumption of normality. However, like the other tests listed in the “Parametric test” column, chi-square is better suited for larger sample sizes compared with its counterpart in the “Nonparametric test” column.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Research questions and expected results.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="240"/>
            <col width="230"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Question</td>
                <td>Method selection</td>
                <td>Statistical assumptions</td>
                <td>Statistical values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Is the distribution of White and non-White patients the same across genders?</td>
                <td>Chi-square</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Independence</p>
                    </list-item>
                    <list-item>
                      <p>Expected values ≥5</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p><italic>χ</italic><sup>2</sup> (<italic>df</italic>) &rightarrow; 1.385 (1)</p>
                    </list-item>
                    <list-item>
                      <p><italic>P</italic>=.24</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Is the distribution of White and non-White patients the same across genders for individuals who are 45 years old?</td>
                <td>Fisher exact</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Independence</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Odds ratio=2.045</p>
                    </list-item>
                    <list-item>
                      <p><italic>P</italic>=.66</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Is there a significant correlation between total charges and length of stay?<sup>a</sup></td>
                <td>Pearson correlation</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Linearity</p>
                    </list-item>
                    <list-item>
                      <p>Normality</p>
                    </list-item>
                    <list-item>
                      <p>Homoscedasticity</p>
                    </list-item>
                    <list-item>
                      <p>No extreme outliers</p>
                    </list-item>
                    <list-item>
                      <p>Paired data</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p><italic>r</italic>=0.842</p>
                    </list-item>
                    <list-item>
                      <p><italic>P</italic>&#60;.001</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Is there a significant correlation between total charges and length of stay for individuals who are 45 years old?</td>
                <td>Spearman rank order correlation</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Monotonicity</p>
                    </list-item>
                    <list-item>
                      <p>Nonnominal data</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>ρ=0.447</p>
                    </list-item>
                    <list-item>
                      <p><italic>P</italic>=.02</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Is there a significant difference in total charges between men and women?</td>
                <td>Independent 2-sample <italic>t</italic> test</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Independence</p>
                    </list-item>
                    <list-item>
                      <p>Normality</p>
                    </list-item>
                    <list-item>
                      <p>Homoscedasticity</p>
                    </list-item>
                    <list-item>
                      <p>No extreme outliers</p>
                    </list-item>
                    <list-item>
                      <p>Continuous data</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p><italic>t</italic>-stat (<italic>df</italic>) &rightarrow; 0.185 (2707)</p>
                    </list-item>
                    <list-item>
                      <p><italic>P</italic>=.85</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Is there a significant difference in total charges between men and women who are 45 years old?</td>
                <td>Mann-Whitney <italic>U</italic></td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Independence</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p><italic>U</italic>-stat=91.000<sup>b</sup></p>
                    </list-item>
                    <list-item>
                      <p><italic>P</italic>=.17</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Are there significant differences in length of stay across race categories?</td>
                <td>1-way ANOVA</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Independence</p>
                    </list-item>
                    <list-item>
                      <p>Normality</p>
                    </list-item>
                    <list-item>
                      <p>Homoscedasticity</p>
                    </list-item>
                    <list-item>
                      <p>No extreme outliers</p>
                    </list-item>
                    <list-item>
                      <p>Continuous data</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p><italic>F</italic>-stat=4.092</p>
                    </list-item>
                    <list-item>
                      <p><italic>P</italic>=.007</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Are there significant differences in length of stay across race categories<sup>c</sup> for individuals who are 45 years old?</td>
                <td>Kruskal-Wallis <italic>H</italic></td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Independence</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p><italic>H</italic>-stat=1.984</p>
                    </list-item>
                    <list-item>
                      <p><italic>P</italic>=.37</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>The variables were transformed for this test to improve normality and homoscedasticity.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>The <italic>U</italic>-stat was computed in Python and RStudio only, as SAS does not produce an equivalent value.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>The Hispanic group was excluded from this test due to insufficient observations.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The prompts used for inferential statistics were formulated at 3 levels of increasing specificity, reflecting a user’s familiarity with statistics, ChatGPT, and Python. The lowest level was “Basic,” where ChatGPT was provided with the research question, relevant variables, and necessary tasks. <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> shows a template used for “Basic” prompts. The “Intermediate” level used prompts that were more specific than “Basic” and included additional guidelines, including data cleaning steps and strategies for assessing the statistical assumptions (SA). The “Advanced” level prompted ChatGPT with the same information as “Intermediate” and a recommended statistical test. An overview of the components for each level of prompt is provided in <xref ref-type="table" rid="table3">Table 3</xref>. The individual prompts used for each test can be found in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <boxed-text id="box1" position="float">
          <title>A “Basic” template prompt for inferential statistics.</title>
          <p>I am analyzing variables related to hospital visits and demographics to answer the question, “_______?” The relevant variables are as follows:</p>
          <list list-type="bullet">
            <list-item>
              <p>x (variable_type): description</p>
            </list-item>
            <list-item>
              <p>y (variable_type): description</p>
            </list-item>
          </list>
          <p>Embody the role of an experienced biostatistician and complete the following tasks:</p>
          <list list-type="bullet">
            <list-item>
              <p>Suggest the most relevant statistical method for analyzing this dataset.</p>
            </list-item>
            <list-item>
              <p>List and verify all of the critical assumptions that must be met to perform this statistical method.</p>
            </list-item>
            <list-item>
              <p>If any critical assumptions of the primary test are not met, identify a more appropriate alternative analysis. When suggesting a new test, list and verify the assumptions for this new test.</p>
            </list-item>
            <list-item>
              <p>Perform the most appropriate test and provide test statistics and <italic>P</italic> values to 3 decimal places.</p>
            </list-item>
          </list>
        </boxed-text>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Components of inferential statistics prompts at varying knowledge levels.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Basic</td>
                <td>Intermediate</td>
                <td>Advanced</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Research question</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Variables and variable types</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Data clean step</td>
                <td>
                  <break/>
                </td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Strategies to assess assumptions</td>
                <td>
                  <break/>
                </td>
                <td>✓</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Suggested method</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Tasks to perform (MS<sup>a</sup>, SA<sup>b</sup>, SV<sup>c</sup>)</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>MS: method selection.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>SA: statistical assumptions.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>SV: statistical values.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Response Grading</title>
        <p>The data analysis tasks were also performed in Python (Python Software Foundation), SAS (SAS Institute), and RStudio (Posit PBC) to generate an expected statistical output for comparison. For data processing, data categorization, data tabulation, and descriptive statistics, the expected statistical values (SV) can be found in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
        <p>For inferential statistics, the expected statistical results are in the SV column of <xref ref-type="table" rid="table2">Table 2</xref>. The results were consistent among Python, SAS, and RStudio. If there were any discrepancies occurring due to algorithmic variations among the platforms, the Python output would have been designated as the “gold standard,” as ChatGPT uses this language for its calculations.</p>
        <p>The measured outcomes were data processing, data categorization, data tabulation, descriptive statistics, and inferential statistics. A trial consisted of a single interaction with ChatGPT, including one user input (prompt) and one ChatGPT output. Data processing and categorization were assessed through a direct comparison of the frequencies generated by ChatGPT to those in Python, SAS, and RStudio, each across 10 trials. Data tabulation was assessed through ChatGPT’s ability to provide the correct columns, rows, and row totals during the processing and categorization trials (20 trials total).</p>
        <p>Descriptive statistics were assessed through a direct comparison of the mean, SD, median, and IQR generated by ChatGPT to the expected values across 10 trials. Inferential statistics tests were evaluated on 3 criteria: MS, SA, and SV across 10 trials per statistical method (80 trials total). MS involved selecting the expected method, SA involved reporting the underlying assumptions critical to the statistical test, and SV involved calculating the test statistics and <italic>P</italic> values for the selected statistical method.</p>
        <p>For data processing, data categorization, data tabulation, and descriptive statistics, the trial was coded as correct only if all values from ChatGPT’s output were within 1% of the expected values. For inferential statistics, MS was coded as correct only if the final test selected by ChatGPT matched the expected test, SA was coded as correct only if the expected assumptions were assessed implicitly or explicitly, and SV was coded as correct only if both the test statistic and <italic>P</italic> value were within 1% of the expected output. A flowchart detailing how all trials were assessed is available in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p>
        <p>All measured outcomes were coded as either correct or incorrect. Only a single prompt was provided to ChatGPT in each trial, with no further interaction. If ChatGPT requested further user input or provided multiple answers without indicating the best choice, that trial was coded as incorrect. If ChatGPT timed out during code interpretation, that trial was discarded. During inferential statistics trials, if MS was incorrect, then SA and SV were coded as incorrect as well. The choice of 10 trials per outcome was informed by previous studies on ChatGPT’s performance in data analysis tasks, most of which assessed ChatGPT’s performance in fewer trials [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. In the absence of an established standard, 10 trials were selected to capture variability in ChatGPT’s responses.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Management and Descriptive Statistics</title>
        <p>ChatGPT performed data processing, categorization, and tabulation correctly across all trials. Variations in table aesthetics were inconsequential to the measured outcomes. For descriptive statistics, ChatGPT provided accurate means, SDs, medians, and IQRs for the 3 continuous variables across all attempts. The compiled tables and descriptive statistics for each variable can be found in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
      </sec>
      <sec>
        <title>Inferential Statistics</title>
        <p>When provided a “Basic” prompt, ChatGPT achieved 47.5% accuracy (38/80 attempts) on MS, 43.8% accuracy (35/80 attempts) on SA, and 32.5% accuracy (26/80 attempts) on SV. With “Intermediate” prompts, ChatGPT achieved 85.0% accuracy (68/80 attempts) on both MS and SA and 81.3% accuracy (65/80 attempts) on SV. With “Advanced” prompts, ChatGPT achieved 92.5% accuracy (74/80 attempts) across all MS, SA, and SV assessments. These results are displayed in <xref ref-type="table" rid="table4">Table 4</xref> and <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Results for 10 attempts at inferential statistics with prompts at varying levels of specificity.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="280"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="0"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="0"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>Test</td>
                <td colspan="4">Basic</td>
                <td colspan="4">Intermediate</td>
                <td colspan="3">Advanced</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>MS<sup>a</sup></td>
                <td>SA<sup>b</sup></td>
                <td>SV<sup>c</sup></td>
                <td colspan="2">MS</td>
                <td>SA</td>
                <td>SV</td>
                <td colspan="2">MS</td>
                <td>SA</td>
                <td>SV</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Chi-square</td>
                <td>10</td>
                <td>10</td>
                <td>0</td>
                <td colspan="2">10</td>
                <td>10</td>
                <td>7</td>
                <td colspan="2">10</td>
                <td>10</td>
                <td>10</td>
              </tr>
              <tr valign="top">
                <td>Fisher</td>
                <td>9</td>
                <td>9</td>
                <td>9</td>
                <td colspan="2">10</td>
                <td>10</td>
                <td>10</td>
                <td colspan="2">10</td>
                <td>10</td>
                <td>10</td>
              </tr>
              <tr valign="top">
                <td>Pearson</td>
                <td>2</td>
                <td>2</td>
                <td>2</td>
                <td colspan="2">8</td>
                <td>8</td>
                <td>8</td>
                <td colspan="2">8</td>
                <td>8</td>
                <td>8</td>
              </tr>
              <tr valign="top">
                <td>Spearman</td>
                <td>9</td>
                <td>6</td>
                <td>8</td>
                <td colspan="2">7</td>
                <td>7</td>
                <td>7</td>
                <td colspan="2">10</td>
                <td>10</td>
                <td>10</td>
              </tr>
              <tr valign="top">
                <td><italic>t</italic> test</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td colspan="2">10</td>
                <td>10</td>
                <td>10</td>
                <td colspan="2">9</td>
                <td>9</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td>Mann-Whitney</td>
                <td>7</td>
                <td>7</td>
                <td>6</td>
                <td colspan="2">8</td>
                <td>8</td>
                <td>8</td>
                <td colspan="2">9</td>
                <td>9</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td>ANOVA</td>
                <td>1</td>
                <td>1</td>
                <td>1</td>
                <td colspan="2">7</td>
                <td>7</td>
                <td>7</td>
                <td colspan="2">8</td>
                <td>8</td>
                <td>8</td>
              </tr>
              <tr valign="top">
                <td>Kruskal-Wallis</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td colspan="2">8</td>
                <td>8</td>
                <td>8</td>
                <td colspan="2">10</td>
                <td>10</td>
                <td>10</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>MS: method selection.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>SA: statistical assumptions.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>SV: statistical values.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>ChatGPT accuracy by prompt specificity.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e63550_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The errors encountered by ChatGPT across all inferential statistics trials included incorrect MS (n=51), coding error (n=13), incomplete response (n=9), incomplete assumptions (n=2), median imputed for missing value (n=1), and outliers incorrectly removed (n=1). A complete list of errors is listed in <xref ref-type="table" rid="table5">Table 5</xref>. Coding errors were limited to chi-square trials only and occurred due to a feature of the Python function. This function, chi2_contigency, contains a parameter for performing Yates correction, a method to prevent overestimation with small samples. As the parameter defaults to true, ChatGPT performed this correction despite the large sample size.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Frequency of errors encountered by ChatGPT during inferential statistics trials.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Test</td>
                <td>Basic</td>
                <td>Intermediate</td>
                <td>Advanced</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Chi-square</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Coding error: 10</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Coding error: 3</p>
                    </list-item>
                  </list>
                </td>
                <td>—<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>Fisher</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 1</p>
                    </list-item>
                  </list>
                </td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Pearson</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 7</p>
                    </list-item>
                    <list-item>
                      <p>Incomplete response: 1</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 2</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 2</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Spearman</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incomplete assumptions: 2</p>
                    </list-item>
                    <list-item>
                      <p>Incomplete response: 1</p>
                    </list-item>
                    <list-item>
                      <p>Imputed value: 1</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 2</p>
                    </list-item>
                    <list-item>
                      <p>Incomplete response: 1</p>
                    </list-item>
                  </list>
                </td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td><italic>t</italic> test</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 10</p>
                    </list-item>
                  </list>
                </td>
                <td>—</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 1</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Mann-Whitney</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incomplete response: 3</p>
                    </list-item>
                    <list-item>
                      <p>Outliers removed: 1</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 2</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incomplete response: 1</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>ANOVA</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 8</p>
                    </list-item>
                    <list-item>
                      <p>Incomplete response: 1</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 3</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 2</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Kruskal-Wallis</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 10</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Incorrect method: 1</p>
                    </list-item>
                    <list-item>
                      <p>Incomplete response: 1</p>
                    </list-item>
                  </list>
                </td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our investigation into the analytical capabilities of ChatGPT revealed a nuanced understanding of its implementation that, while promising, exhibits limitations that warrant careful consideration. ChatGPT’s performance demonstrated proficiency in data processing, data categorization, data tabulation, and descriptive statistics. The most specific prompts improved response accuracy across inferential statistics trials. Based on our data, “Basic” prompts provided little value, with a low overall accuracy across all trials. Meanwhile, “Intermediate” prompts resulted in a similar overall accuracy compared with “Advanced” prompts despite the addition of a suggested statistical test in the “Advanced” prompts.</p>
        <p>The variation observed across trials reflects the probabilistic nature and inherent unpredictability of ChatGPT [<xref ref-type="bibr" rid="ref15">15</xref>]. Evaluating its accuracy as a tool requires repeated testing [<xref ref-type="bibr" rid="ref16">16</xref>]. Given that GPT-4 is trained on over 1 trillion parameters [<xref ref-type="bibr" rid="ref17">17</xref>], a definitive assessment of its performance is nearly impossible, as it would require countless trials to capture all possible response variations. This could explain the anomalies in the <italic>t</italic> test and Spearman trials, where less specific prompts sometimes outperformed more detailed ones.</p>
        <p>The calculation errors from ChatGPT likely stem from challenges in implementing user instructions and interpreting intermediary outputs, rather than inherent issues with the Python-based analytical frameworks. With intentional instructions, most inaccuracies can be avoided. Although MS was the most common source of error, ChatGPT’s responses included sufficient information for users to verify the suitability of the statistical method. Furthermore, in the “Intermediate” and “Advanced” trials, if ChatGPT selected the correct method, the remaining tasks were frequently completed accurately. Precise and specific prompts generally enhance accuracy; however, erroneous instructions may introduce further inaccuracies, as ChatGPT heavily relies on user input. Successful use of ChatGPT as a statistical doula requires balancing clear analysis goals with the flexibility needed for ChatGPT to present accurate information, without inadvertently introducing bias.</p>
        <p>The power and democratization of ChatGPT allow for collaboration to further mitigate concerns about the analytical process. Although these trials were based on single prompts, a series of follow-up prompts by a user can identify determinations with regard to the data, the proposed analysis, or the code. For example, when asked about the Yates correction, ChatGPT acknowledged its use for small samples. To prevent errors from defaulted values like the Yates parameter, prompts should instruct ChatGPT to specify a function’s parameters and provide the rationale for setting each parameter. This method was incorporated into the “Intermediate” and “Advanced” trials.</p>
        <p>ChatGPT will inevitably produce inaccurate results that, if not carefully verified, may be detrimental to the research community. Therefore, all ChatGPT-generated analyses should be approached with caution. Based on this 10-trial approach, researchers are encouraged to incorporate multiple trials of the requested analysis in the statistical workflow using as specific instructions as possible. ChatGPT is not intended to be a standalone tool for data analysis; consultation with a biostatistician is essential for validating statistical approaches and ensuring reliable results.</p>
        <p>Our dataset and assessments are limitations. Although we selected a moderately sized sample, it is still small relative to datasets used in complex analyses and may not assess the extent of ChatGPT’s capabilities. In this report, the statistical analyses focused on univariate comparisons, where the capabilities for bivariate data analysis remain to be validated. Furthermore, due to the probabilistic nature of ChatGPT, a definitive performance assessment would require thousands of trials. Finally, we did not assess the validity of ChatGPT’s statements outside of the chosen measures. When ChatGPT provided correct, incorrect, or extraneous information in the output, we did not count that against the assessment of accuracy to replicate the expected analysis.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>ChatGPT has significant potential as a tool for exploratory data analysis, particularly for researchers who have some statistical knowledge but limited programming expertise. This paper is intended for individuals with a foundational understanding of statistics who could generate “Intermediate” prompts on their own. We hope that these individuals may use ChatGPT to perform preliminary analyses, helping them to understand their data, draw initial insights, and begin writing their papers while waiting for consultation with a statistician. These preliminary analyses are not meant to replace expert review but to accelerate the research process. Furthermore, ChatGPT may serve as an educational resource, helping researchers better understand statistical analyses and tackle unique problems [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
        <p>Further advancements to ChatGPT will undoubtedly enhance its applicability and accuracy in statistical analysis. Data visualization is a crucial component of data analysis, and although still limited, ChatGPT’s ability to process visual inputs through its “Vision” feature has already shown promise in interpreting statistical figures [<xref ref-type="bibr" rid="ref19">19</xref>]. Furthermore, OpenAI is rapidly advancing its GPT models, and GPT-o1, the latest version, is already available for preview. This new model is reportedly better suited to “reason through complex tasks and solve more difficult problems in science, coding, and math” [<xref ref-type="bibr" rid="ref20">20</xref>]. Improvements to both “Vision” and the GPT model will solidify ChatGPT’s role as an asset for researchers.</p>
        <p>We encourage researchers to leverage ChatGPT for the programming aspects of their statistical work while leaving the critical decisions to human expertise. By doing so, they can harness the full potential of ChatGPT as a supplementary tool in their research arsenal, ensuring that its application is both productive and scientifically sound. For researchers interested in applying ChatGPT to data analysis, we recommend following the best practices outlined in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>.</p>
        <boxed-text id="box2" position="float">
          <title>Best practices for performing data analysis with ChatGPT.</title>
          <p>
            <bold>Prompting</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Prompts should have specific and comprehensive goals. Researchers can benefit from creating a flexible template that can be adapted across different data analysis tasks. Additional details relevant to the current query should be included as needed. For those using ChatGPT for inferential statistics, prompts should, at a minimum, resemble the level of detail used in our “Intermediate” trials.</p>
            </list-item>
          </list>
          <p>
            <bold>Refinement</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Approach data analysis like a discussion. Engage ChatGPT by asking it to identify any potential gaps in its knowledge or the inputs provided. Request feedback on how prompts can be improved for future trials. Ask ChatGPT to explain any decisions or necessary parameters.</p>
            </list-item>
          </list>
          <p>
            <bold>Consistency</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>At a minimum, attempt each trial 3 times to verify its consistency.</p>
            </list-item>
          </list>
          <p>
            <bold>Verification</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>ChatGPT-generated data analysis can be used to start drafting a paper before consulting a statistician. However, ensure that all statistical outputs and interpretations are reviewed by a statistician before publication.</p>
            </list-item>
          </list>
          <p>
            <bold>Transparency</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Be transparent about using ChatGPT to assist with data analysis. Consider sharing the prompts used in your analysis to allow readers to replicate or refine the process.</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Data processing and categorization prompts.</p>
        <media xlink:href="jmir_v27i1e63550_app1.docx" xlink:title="DOCX File , 8 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Descriptive statistics prompts.</p>
        <media xlink:href="jmir_v27i1e63550_app2.docx" xlink:title="DOCX File , 7 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Inferential statistics prompts at varying levels of specificity.</p>
        <media xlink:href="jmir_v27i1e63550_app3.docx" xlink:title="DOCX File , 16 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Data tabulation and descriptive statistics results.</p>
        <media xlink:href="jmir_v27i1e63550_app4.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Trial design flowchart.</p>
        <media xlink:href="jmir_v27i1e63550_app5.docx" xlink:title="DOCX File , 475 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">MS</term>
          <def>
            <p>method selection</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">SA</term>
          <def>
            <p>statistical assumptions</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">SV</term>
          <def>
            <p>statistical values</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="con">
        <p>MRR and CI contributed to the study conception. MRR and TG handled the literature search, drafting, and data collection. CI and JL performed critical revisions.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bhargava</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Jadav</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Meshram</surname>
              <given-names>VP</given-names>
            </name>
            <name name-style="western">
              <surname>Kanchan</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT in medical research: challenging time ahead</article-title>
          <source>Med Leg J</source>
          <year>2023</year>
          <volume>91</volume>
          <issue>4</issue>
          <fpage>223</fpage>
          <lpage>225</lpage>
          <pub-id pub-id-type="doi">10.1177/00258172231184548</pub-id>
          <pub-id pub-id-type="medline">37802491</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gödde</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nöhl</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rupert</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rimkus</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ehlers</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Breuckmann</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sellmann</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>A SWOT (Strengths, Weaknesses, Opportunities, and Threats) analysis of ChatGPT in the medical literature: concise review</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <volume>25</volume>
          <fpage>e49368</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e49368/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/49368</pub-id>
          <pub-id pub-id-type="medline">37865883</pub-id>
          <pub-id pub-id-type="pii">v25i1e49368</pub-id>
          <pub-id pub-id-type="pmcid">PMC10690535</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khlaif</surname>
              <given-names>ZN</given-names>
            </name>
            <name name-style="western">
              <surname>Mousa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hattab</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Itmazi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Sanmugam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ayyoub</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The potential and concerns of using AI in scientific research: ChatGPT performance evaluation</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <volume>9</volume>
          <fpage>e47049</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e47049/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/47049</pub-id>
          <pub-id pub-id-type="medline">37707884</pub-id>
          <pub-id pub-id-type="pii">v9i1e47049</pub-id>
          <pub-id pub-id-type="pmcid">PMC10636627</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sallam</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns</article-title>
          <source>Healthcare (Basel)</source>
          <year>2023</year>
          <volume>11</volume>
          <issue>6</issue>
          <fpage>887</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=healthcare11060887"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/healthcare11060887</pub-id>
          <pub-id pub-id-type="medline">36981544</pub-id>
          <pub-id pub-id-type="pii">healthcare11060887</pub-id>
          <pub-id pub-id-type="pmcid">PMC10048148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Dis</surname>
              <given-names>EAM</given-names>
            </name>
            <name name-style="western">
              <surname>Bollen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zuidema</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>van Rooij</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bockting</surname>
              <given-names>CL</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT: five priorities for research</article-title>
          <source>Nature</source>
          <year>2023</year>
          <volume>614</volume>
          <issue>7947</issue>
          <fpage>224</fpage>
          <lpage>226</lpage>
          <pub-id pub-id-type="doi">10.1038/d41586-023-00288-7</pub-id>
          <pub-id pub-id-type="medline">36737653</pub-id>
          <pub-id pub-id-type="pii">10.1038/d41586-023-00288-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Piccolo</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Luxton-Reilly</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Payne</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Ridge</surname>
              <given-names>PG</given-names>
            </name>
          </person-group>
          <article-title>Evaluating a large language model's ability to solve programming exercises from an introductory bioinformatics course</article-title>
          <source>PLoS Comput Biol</source>
          <year>2023</year>
          <volume>19</volume>
          <issue>9</issue>
          <fpage>e1011511</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pcbi.1011511"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1011511</pub-id>
          <pub-id pub-id-type="medline">37769024</pub-id>
          <pub-id pub-id-type="pii">PCOMPBIOL-D-23-00520</pub-id>
          <pub-id pub-id-type="pmcid">PMC10564134</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Code interpreter for bioinformatics: are we there yet?</article-title>
          <source>Ann Biomed Eng</source>
          <year>2024</year>
          <volume>52</volume>
          <issue>4</issue>
          <fpage>754</fpage>
          <lpage>756</lpage>
          <pub-id pub-id-type="doi">10.1007/s10439-023-03324-9</pub-id>
          <pub-id pub-id-type="medline">37482573</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10439-023-03324-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perkel</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Six tips for better coding with ChatGPT</article-title>
          <source>Nature</source>
          <year>2023</year>
          <volume>618</volume>
          <issue>7964</issue>
          <fpage>422</fpage>
          <lpage>423</lpage>
          <pub-id pub-id-type="doi">10.1038/d41586-023-01833-0</pub-id>
          <pub-id pub-id-type="medline">37277596</pub-id>
          <pub-id pub-id-type="pii">10.1038/d41586-023-01833-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shue</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Empowering beginners in bioinformatics with ChatGPT</article-title>
          <source>Quant Biol</source>
          <year>2023</year>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>105</fpage>
          <lpage>108</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37378043"/>
          </comment>
          <pub-id pub-id-type="doi">10.15302/j-qb-023-0327</pub-id>
          <pub-id pub-id-type="medline">37378043</pub-id>
          <pub-id pub-id-type="pmcid">PMC10299548</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT opens a new door for bioinformatics</article-title>
          <source>Quant Biol</source>
          <year>2023</year>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>204</fpage>
          <lpage>206</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37900935"/>
          </comment>
          <pub-id pub-id-type="doi">10.15302/j-qb-023-0328</pub-id>
          <pub-id pub-id-type="medline">37900935</pub-id>
          <pub-id pub-id-type="pmcid">PMC10609615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Evaluating ChatGPT-4.0's data analytic proficiency in epidemiological studies: a comparative analysis with SAS, SPSS, and R</article-title>
          <source>J Glob Health</source>
          <year>2024</year>
          <volume>14</volume>
          <fpage>04070</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38547497"/>
          </comment>
          <pub-id pub-id-type="doi">10.7189/jogh.14.04070</pub-id>
          <pub-id pub-id-type="medline">38547497</pub-id>
          <pub-id pub-id-type="pmcid">PMC10978058</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ignjatović</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stevanović</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Efficacy and limitations of ChatGPT as a biostatistical problem-solving tool in medical education in serbia: a descriptive study</article-title>
          <source>J Educ Eval Health Prof</source>
          <year>2023</year>
          <volume>20</volume>
          <fpage>28</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37840252"/>
          </comment>
          <pub-id pub-id-type="doi">10.3352/jeehp.2023.20.28</pub-id>
          <pub-id pub-id-type="medline">37840252</pub-id>
          <pub-id pub-id-type="pii">jeehp.2023.20.28</pub-id>
          <pub-id pub-id-type="pmcid">PMC10646144</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jahangiri</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Can Chat Generative Pretraining Transformer (ChatGPT) be used for statistical analysis of research data?</article-title>
          <source>J Vasc Interv Radiol</source>
          <year>2023</year>
          <volume>34</volume>
          <issue>12</issue>
          <fpage>2242</fpage>
          <lpage>2246.e2</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jvir.2023.09.010</pub-id>
          <pub-id pub-id-type="medline">37717655</pub-id>
          <pub-id pub-id-type="pii">S1051-0443(23)00669-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ordak</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT's skills in statistical analysis using the example of allergology: do we have reason for concern?</article-title>
          <source>Healthcare (Basel)</source>
          <year>2023</year>
          <volume>11</volume>
          <issue>18</issue>
          <fpage>2554</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=healthcare11182554"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/healthcare11182554</pub-id>
          <pub-id pub-id-type="medline">37761751</pub-id>
          <pub-id pub-id-type="pii">healthcare11182554</pub-id>
          <pub-id pub-id-type="pmcid">PMC10530997</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruksakulpiwat</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Phianhasin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Benjasirisan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ajibade</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Assessing the efficacy of ChatGPT versus human researchers in identifying relevant studies on mHealth interventions for improving medication adherence in patients with ischemic stroke when conducting systematic reviews: comparative analysis</article-title>
          <source>JMIR mHealth uHealth</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>e51526</fpage>
          <pub-id pub-id-type="doi">10.2196/51526</pub-id>
          <pub-id pub-id-type="medline">38710069</pub-id>
          <pub-id pub-id-type="pii">v12i1e51526</pub-id>
          <pub-id pub-id-type="pmcid">PMC11106699</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The evaluation of generative AI should include repetition to assess stability</article-title>
          <source>JMIR mHealth uHealth</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>e57978</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2024//e57978/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/57978</pub-id>
          <pub-id pub-id-type="medline">38688841</pub-id>
          <pub-id pub-id-type="pii">v12i1e57978</pub-id>
          <pub-id pub-id-type="pmcid">PMC11106698</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A future of smarter digital health empowered by generative pretrained transformer</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <volume>25</volume>
          <fpage>e49963</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e49963/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/49963</pub-id>
          <pub-id pub-id-type="medline">37751243</pub-id>
          <pub-id pub-id-type="pii">v25i1e49963</pub-id>
          <pub-id pub-id-type="pmcid">PMC10565615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magalhaes Araujo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cruz-Correia</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Incorporating ChatGPT in medical informatics education: mixed methods study on student perceptions and experiential integration proposals</article-title>
          <source>JMIR Med Educ</source>
          <year>2024</year>
          <volume>10</volume>
          <fpage>e51151</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2024//e51151/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51151</pub-id>
          <pub-id pub-id-type="medline">38506920</pub-id>
          <pub-id pub-id-type="pii">v10i1e51151</pub-id>
          <pub-id pub-id-type="pmcid">PMC10993110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Scientific figures interpreted by ChatGPT: strengths in plot recognition and limits in color perception</article-title>
          <source>NPJ Precis Oncol</source>
          <year>2024</year>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>84</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41698-024-00576-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41698-024-00576-z</pub-id>
          <pub-id pub-id-type="medline">38580746</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41698-024-00576-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC10997760</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>Introducing OpenAI o1-preview</article-title>
          <source>OpenAI</source>
          <year>2024</year>
          <access-date>2024-12-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/index/introducing-openai-o1-preview">https://openai.com/index/introducing-openai-o1-preview</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
