<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e51926</article-id>
      <article-id pub-id-type="pmid">38252483</article-id>
      <article-id pub-id-type="doi">10.2196/51926</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Uncovering Language Disparity of ChatGPT on Retinal Vascular Disease Classification: Cross-Sectional Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>de Azevedo Cardoso</surname>
            <given-names>Taiane</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>余</surname>
            <given-names>家瑞</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>He</surname>
            <given-names>Xianying</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gilson</surname>
            <given-names>Aidan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Xiaocong</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5323-2954</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Jiageng</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0984-0818</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Shao</surname>
            <given-names>An</given-names>
          </name>
          <degrees>MM</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1795-4877</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>Wenyue</given-names>
          </name>
          <degrees>MB</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1352-9419</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Ye</surname>
            <given-names>Panpan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8165-023X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Yao</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7258-6093</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Ye</surname>
            <given-names>Juan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1948-2500</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Jin</surname>
            <given-names>Kai</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Eye Center</institution>
            <institution>The Second Affiliated Hospital</institution>
            <institution>Zhejiang University</institution>
            <addr-line>88 Jiefang Road, Hangzhou</addr-line>
            <addr-line>Zhejiang, 310009</addr-line>
            <country>China</country>
            <phone>86 571 87783907</phone>
            <email>jinkai@zju.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4369-2417</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Jie</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5696-363X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Eye Center</institution>
        <institution>The Second Affiliated Hospital</institution>
        <institution>Zhejiang University</institution>
        <addr-line>Zhejiang</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Public Health</institution>
        <institution>Zhejiang University School of Medicine</institution>
        <addr-line>Zhejiang</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Kai Jin <email>jinkai@zju.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>1</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e51926</elocation-id>
      <history>
        <date date-type="received">
          <day>17</day>
          <month>8</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>20</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>7</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>30</day>
          <month>11</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Xiaocong Liu, Jiageng Wu, An Shao, Wenyue Shen, Panpan Ye, Yao Wang, Juan Ye, Kai Jin, Jie Yang. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 22.01.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e51926" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Benefiting from rich knowledge and the exceptional ability to understand text, large language models like ChatGPT have shown great potential in English clinical environments. However, the performance of ChatGPT in non-English clinical settings, as well as its reasoning, have not been explored in depth.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to evaluate ChatGPT’s diagnostic performance and inference abilities for retinal vascular diseases in a non-English clinical environment.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>In this cross-sectional study, we collected 1226 fundus fluorescein angiography reports and corresponding diagnoses written in Chinese and tested ChatGPT with 4 prompting strategies (direct diagnosis or diagnosis with a step-by-step reasoning process and in Chinese or English).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Compared with ChatGPT using Chinese prompts for direct diagnosis that achieved an <italic>F</italic><sub>1</sub>-score of 70.47%, ChatGPT using English prompts for direct diagnosis achieved the best diagnostic performance (80.05%), which was inferior to ophthalmologists (89.35%) but close to ophthalmologist interns (82.69%). As for its inference abilities, although ChatGPT can derive a reasoning process with a low error rate (0.4 per report) for both Chinese and English prompts, ophthalmologists identified that the latter brought more reasoning steps with less incompleteness (44.31%), misinformation (1.96%), and hallucinations (0.59%) (all <italic>P</italic>&#60;.001). Also, analysis of the robustness of ChatGPT with different language prompts indicated significant differences in the recall (<italic>P</italic>=.03) and <italic>F</italic><sub>1</sub>-score (<italic>P</italic>=.04) between Chinese and English prompts. In short, when prompted in English, ChatGPT exhibited enhanced diagnostic and inference capabilities for retinal vascular disease classification based on Chinese fundus fluorescein angiography reports.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>ChatGPT can serve as a helpful medical assistant to provide diagnosis in non-English clinical environments, but there are still performance gaps, language disparities, and errors compared to professionals, which demonstrate the potential limitations and the need to continually explore more robust large language models in ophthalmology practice.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>large language models</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>clinical decision support</kwd>
        <kwd>retinal vascular disease</kwd>
        <kwd>artificial intelligence</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The global population of individuals with visual impairments exceeded 2.2 billion in 2019 and continues to rise [<xref ref-type="bibr" rid="ref1">1</xref>]. As the leading causes of blindness, retinal vascular diseases are characterized by a complex array of clinical manifestations [<xref ref-type="bibr" rid="ref2">2</xref>]. Fundus fluorescein angiography (FFA), which uses an injected fluorescent dye to examine circulation in the retina and choroid, is a specialized ophthalmic test used to visualize the retinal vasculature [<xref ref-type="bibr" rid="ref3">3</xref>]. In practice, interpreting FFA results and making a diagnosis requires laborious analysis by experienced ophthalmologists.</p>
      <p>In recent years, significant developments in deep learning approaches, which are extensively utilized, have rendered them a promising way for auxiliary diagnosis of retinal vascular diseases. The existing research has mainly focused on developing convolutional neural network algorithms for lesion detection in FFA images [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref8">8</xref>], such as microaneurysms, leakages, nonperfusion areas, and neovascularization. Further, some studies focused on automatically generating FFA reports [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>], which can highlight abnormalities for ophthalmologists and provide a theoretical basis for disease diagnosis. However, few studies were devoted to the diagnosis of retinal vascular disease based on FFA reports. The main challenges of using natural language processing to diagnose retinal vascular diseases can be summarized as follows: (1) different interpretation of FFA images by different ophthalmologists, (2) varied ophthalmological terms contained in FFA reports, (3) time-varying imaging features contained in FFA reports, and (4) smaller data volume caused by the high cost and possible side effects of FFA.</p>
      <p>Recently, large language models (LLMs) like ChatGPT [<xref ref-type="bibr" rid="ref11">11</xref>] have demonstrated exceptional performance in various tasks due to their rich internal knowledge and strong deductive reasoning abilities [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. However, the related research within the medical field primarily focuses on knowledge assessment [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>], and a comprehensive evaluation of ChatGPT’s capabilities in ophthalmology for disease diagnosis is lacking. Additionally, although existing LLMs demonstrate impressive cross-language understanding abilities, they may lead to significant disparities in non–English-specific fields because they were primarily trained on English corpora [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Therefore, in this study, by exploring ChatGPT’s ability to understand Chinese FFA reports, our objectives were to evaluate ChatGPT’s diagnostic performance and inference abilities for retinal vascular diseases in a non-English clinical environment and to find appropriate prompt strategies under these scenarios.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Preparation</title>
        <p>We collected 1226 Chinese FFA reports and the corresponding clinical diagnoses of 728 patients from the Eye Center of the Second Affiliated Hospital of Zhejiang University (SAHZU) between August 2016 and September 2021. The clinical diagnosis of each eye was either classified as normal or one of the 6 primary retinal vascular diseases: diabetic retinopathy (DR), wet age-related macular degeneration, central serous chorioretinopathy (CSC), branch retinal vein occlusion (BRVO), central retinal vein occlusion (CRVO), and Vogt-Koyanagi-Harada disease (VKH). The clinical diagnosis was based on clinical information from the patients, primarily the FFA images and reports.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Ethical approval was obtained from the Ethics Committee in the SAHZU School of Medicine (2019-428). This research involves medical records data. We ensured that the medical records were deidentified and all private information was removed. The Institutional Review Board agreed to share access to the data with third parties, including sending it through application programing interfaces (APIs) provided by companies like OpenAI, or using it on web-based platforms like ChatGPT.</p>
      </sec>
      <sec>
        <title>Diagnosis of Retinal Vascular Diseases Using ChatGPT</title>
        <p>To diagnose the patient’s eye status based on the FFA report with ChatGPT, we designed a fixed instruction that concatenates the patient’s FFA report as the whole prompt for ChatGPT. The instruction consists of a specific task description and all alternative conditions. To fully exploit the potential of ChatGPT, we implemented different prompting strategies to investigate the potential effect and find the most appropriate way to apply it. First, we used the direct prompting strategy that requires ChatGPT to directly output the final option without explanations. Second, inspired by chain-of-thought prompting technology [<xref ref-type="bibr" rid="ref23">23</xref>], we adopted a step prompting strategy to elicit the detailed reasoning process, which provides interpretability for the disease diagnosis. Finally, ChatGPT was primarily trained on English corpora and may have difficulty recognizing instructions and FFA reports in Chinese, as well as making use of internal knowledge. Therefore, we also rewrote the prompts in English while keeping the FFA reports in Chinese to conduct code-switching prompt examination. Therefore, we mainly investigated 4 prompt strategies: Direct-Chinese, Step-Chinese, Direct-English, and Step-English. The detailed prompts can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>To avoid the randomness of ChatGPT’s response, we set the inference temperature to 0 so as to choose the greedy decoding strategy via the API, making the response more focused and deterministic. Furthermore, we evaluated the robustness of ChatGPT to different languages by calculating the average performance of ChatGPT using different prompting methods. All tests were conducted on the same version of GPT3.5-Turbo-0301 using the official API of OpenAI. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the overall workflow.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The overall workflow.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e51926_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Measurements and Definitions</title>
        <p>We designed a systematic evaluation scheme to assess the performance of ChatGPT. In addition to diagnostic performance, we also incorporated a combination of inference ability, omission of information, hallucinations, misinformation, and inconsistency assessments to evaluate the ChatGPT’s reasoning process, as follows:</p>
        <list list-type="order">
          <list-item>
            <p>Diagnostic performance: precision, recall, and <italic>F</italic><sub>1</sub>-score.</p>
          </list-item>
          <list-item>
            <p>Inference ability [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]: the total reasoning steps, the number of reasoning errors, and the incompleteness of the inference process.</p>
          </list-item>
          <list-item>
            <p>Omission of information [<xref ref-type="bibr" rid="ref27">27</xref>]: whether crucial information from the original report was overlooked.</p>
          </list-item>
          <list-item>
            <p>Hallucinations [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]: whether ChatGPT generated medical findings that were not present in the original report.</p>
          </list-item>
          <list-item>
            <p>Misinformation [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]: whether the output of ChatGPT quoted incorrect prior knowledge.</p>
          </list-item>
          <list-item>
            <p>Inconsistency [<xref ref-type="bibr" rid="ref30">30</xref>]: whether the reasoning result was inconsistent with the reasoning process.</p>
          </list-item>
        </list>
        <p>For diagnostic evaluation, precision, recall, and <italic>F</italic><sub>1</sub>-score were calculated based on ChatGPT’s responses and gold clinical diagnosis. Additionally, to evaluate the diagnostic performance of ChatGPT, 2 ophthalmologists and 2 ophthalmology interns with 2 years of clinical experience from SAHZU were invited to diagnose 100 FFA reports, which were randomly selected according to the proportion of diseases. In terms of the evaluation on ChatGPT’s inference ability, the last 5 measurements were evaluated on the responses to the Step-Chinese and Step-English prompts by 2 ophthalmologists from SAHZU. We randomly selected 509 FFA reports (no more than 100 for each disease) and the corresponding ChatGPT outputs for evaluation. Before the formal evaluation, the ophthalmologists were requested to conduct an annotation with training and achieved a final interannotator agreement up to 94%. The statistical analysis between the Chinese and English prompts was performed with the use of Chi-square tests for categorical measurements and Student <italic>t</italic> tests or Wilcoxon rank-sum tests for continuous measurements, as appropriate. A 2-sided <italic>P</italic>&#60;.05 was considered statistically significant.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Characteristics of ChatGPT’s Responses</title>
        <p>The characteristics of the FFA reports and responses by ChatGPT are listed in <xref ref-type="table" rid="table1">Table 1</xref>. Direct-Chinese and Direct-English prompts received responses that directly provided the final options, and their mean (SD) lengths were 19.2 (4.4) tokens and 5.7 (1.7) tokens, respectively, while Step-Chinese and Step-English prompts received responses that provided the detailed reasoning process, and their mean (SD) lengths were 118.4 (71.8) tokens and 100.5 (36.9) tokens, respectively. Examples of different prompts and their responses are presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Characteristics of the FFA reports and ChatGPT’s responses (N=1226).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="90"/>
            <col width="190"/>
            <col width="180"/>
            <col width="170"/>
            <col width="130"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>Count, n</td>
                <td>Report length (tokens), mean (SD)</td>
                <td colspan="4">Response length (tokens), mean (SD)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Direct-Chinese</td>
                <td>Direct-English</td>
                <td>Step-Chinese</td>
                <td>Step-English</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Normal</td>
                <td>117</td>
                <td>10.5 (2.4)</td>
                <td>14.2 (2.0)</td>
                <td>5.4 (1.6)</td>
                <td>86.6 (52.7)</td>
                <td>64.6 (23.7)</td>
              </tr>
              <tr valign="top">
                <td>DR<sup>a</sup></td>
                <td>717</td>
                <td>46.4 (12.1)</td>
                <td>19.5 (5.0)</td>
                <td>5.7 (1.6)</td>
                <td>124.0 (81.2)</td>
                <td>100.9 (32.1)</td>
              </tr>
              <tr valign="top">
                <td>wetAMD<sup>b</sup></td>
                <td>183</td>
                <td>31.1 (11.2)</td>
                <td>20.5 (1.5)</td>
                <td>6.1 (1.6)</td>
                <td>108.9 (46.4)</td>
                <td>114.4 (44.7)</td>
              </tr>
              <tr valign="top">
                <td>CSC<sup>c</sup></td>
                <td>73</td>
                <td>29.9 (6.7)</td>
                <td>19.3 (2.7)</td>
                <td>6.3 (1.9)</td>
                <td>146.7 (78.7)</td>
                <td>127.4 (41.5)</td>
              </tr>
              <tr valign="top">
                <td>BRVO<sup>d</sup></td>
                <td>63</td>
                <td>44.7 (11.1)</td>
                <td>19.8 (2.0)</td>
                <td>5.2 (1.8)</td>
                <td>106.5 (23.9)</td>
                <td>87.2 (24.1)</td>
              </tr>
              <tr valign="top">
                <td>CRVO<sup>e</sup></td>
                <td>38</td>
                <td>50.6 (10.5)</td>
                <td>20.7 (3.2)</td>
                <td>4.8 (1.8)</td>
                <td>134.5 (52.1)</td>
                <td>91.4 (22.6)</td>
              </tr>
              <tr valign="top">
                <td>VKH<sup>f</sup></td>
                <td>35</td>
                <td>34.7 (13.5)</td>
                <td>19.9 (2.3)</td>
                <td>5.3 (1.4)</td>
                <td>105.4 (43.9)</td>
                <td>116.5 (41.4)</td>
              </tr>
              <tr valign="top">
                <td>Overall</td>
                <td>1226</td>
                <td>39.4 (15.9)</td>
                <td>19.2 (4.4)</td>
                <td>5.7 (1.7)</td>
                <td>118.4 (71.8)</td>
                <td>100.5 (36.9)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>DR: diabetic retinopathy.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>wetAMD: wet age-related macular degeneration.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>CSC: central serous chorioretinopathy.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>BRVO: branch retinal vein occlusion.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>CRVO: central retinal vein occlusion.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>VKH: Vogt-Koyanagi-Harada disease.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Diagnostic Performance</title>
        <p>The Direct-English prompts achieved an overall precision of 79.61%, recall of 83.12%, and <italic>F</italic><sub>1</sub>-score of 80.05%, which was 9.58% higher than that achieved by the Direct-Chinese prompts (<xref ref-type="table" rid="table2">Table 2</xref>). The diagnostic performance varied significantly for each disease category. ChatGPT performed better in the normal and DR categories, with the <italic>F</italic><sub>1</sub>-scores exceeding 80%, but performed worse in the VKH and CSC categories, achieving <italic>F</italic><sub>1</sub>-scores of less than 4%. Additionally, the <italic>F</italic><sub>1</sub>-score in the BRVO category varied greatly, from 54.35% for Direct-Chinese prompts to 74.51% for Direct-English prompts.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Diagnostic performance of ChatGPT across various disease categories on the FFA reports.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="70"/>
            <col width="80"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="0"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="0"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td colspan="3">Direct-Chinese (%)</td>
                <td colspan="4">Direct-English (%)</td>
                <td colspan="4">Step-Chinese (%)</td>
                <td colspan="3">Step-English (%)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>P<sup>a</sup></td>
                <td>R<sup>b</sup></td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>P</td>
                <td>R</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td colspan="2">P</td>
                <td>R</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td colspan="2">P</td>
                <td>R</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Normal</td>
                <td>100</td>
                <td>85.47</td>
                <td>92.17</td>
                <td>100</td>
                <td>88.03</td>
                <td>93.64</td>
                <td colspan="2">98.39</td>
                <td>52.14</td>
                <td>68.16</td>
                <td colspan="2">97.37</td>
                <td>94.87</td>
                <td>96.1</td>
              </tr>
              <tr valign="top">
                <td>DR<sup>c</sup></td>
                <td>91.55</td>
                <td>72.52</td>
                <td>80.93</td>
                <td>91.05</td>
                <td>95.12</td>
                <td>93.04</td>
                <td colspan="2">85.07</td>
                <td>95.4</td>
                <td>89.94</td>
                <td colspan="2">82.13</td>
                <td>93.58</td>
                <td>87.48</td>
              </tr>
              <tr valign="top">
                <td>wetAMD<sup>d</sup></td>
                <td>44.72</td>
                <td>87.98</td>
                <td>59.3</td>
                <td>59.92</td>
                <td>80.87</td>
                <td>68.84</td>
                <td colspan="2">63.58</td>
                <td>60.11</td>
                <td>61.8</td>
                <td colspan="2">60</td>
                <td>34.42</td>
                <td>43.75</td>
              </tr>
              <tr valign="top">
                <td>CSC<sup>e</sup></td>
                <td>4.35</td>
                <td>2.74</td>
                <td>3.36</td>
                <td>33.33</td>
                <td>1.37</td>
                <td>2.63</td>
                <td colspan="2">34.15</td>
                <td>19.18</td>
                <td>24.56</td>
                <td colspan="2">50</td>
                <td>6.85</td>
                <td>12.05</td>
              </tr>
              <tr valign="top">
                <td>BRVO<sup>f</sup></td>
                <td>41.32</td>
                <td>79.37</td>
                <td>54.35</td>
                <td>63.33</td>
                <td>90.47</td>
                <td>74.51</td>
                <td colspan="2">83.61</td>
                <td>80.95</td>
                <td>82.26</td>
                <td colspan="2">67.95</td>
                <td>84.13</td>
                <td>75.18</td>
              </tr>
              <tr valign="top">
                <td>CRVO<sup>g</sup></td>
                <td>93.1</td>
                <td>71.05</td>
                <td>80.6</td>
                <td>84.85</td>
                <td>73.68</td>
                <td>78.87</td>
                <td colspan="2">41.27</td>
                <td>68.42</td>
                <td>51.49</td>
                <td colspan="2">58.33</td>
                <td>73.68</td>
                <td>65.12</td>
              </tr>
              <tr valign="top">
                <td>VKH<sup>h</sup></td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td colspan="2">0</td>
                <td>0</td>
                <td>0</td>
                <td colspan="2">0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Overall</td>
                <td>75.03</td>
                <td>70.15</td>
                <td>70.47</td>
                <td>79.61</td>
                <td>83.12</td>
                <td>80.05</td>
                <td colspan="2">76.24</td>
                <td>77.16</td>
                <td>75.61</td>
                <td colspan="2">74.56</td>
                <td>75.94</td>
                <td>73.46</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>P: precision.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>R: recall.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>DR: diabetic retinopathy.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>wetAMD: wet age-related macular degeneration.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>CSC: central serous chorioretinopathy.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>BRVO: branch retinal vein occlusion.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>CRVO: central retinal vein occlusion.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>VKH: Vogt-Koyanagi-Harada disease.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In contrast, the Step-Chinese prompts achieved an overall precision of 76.24%, recall of 77.16%, and <italic>F</italic><sub>1</sub>-score of 75.61%, which was 2.15% higher than that achieved by ChatGPT for Step-English prompts. Compared with Direct-Chinese prompts, the <italic>F</italic><sub>1</sub>-score for Step-Chinese prompts was increased by 5.14% and provided the reasoning process, which is crucial for disease diagnosis. However, the diagnostic performance of Step-Chinese prompts in the normal and CRVO categories was far worse than that of Direct-Chinese prompts. This is mainly because, with Step-Chinese prompts, ChatGPT generated hallucinations for FFA reports in the normal category, which were wrongly diagnosed as CRVO. <xref rid="figure2" ref-type="fig">Figure 2</xref> further demonstrates the confusion matrices of ChatGPT for the 4 prompting strategies.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Confusion matrices of ChatGPT for the 4 prompting strategies. BRVO: branch retinal vein occlusion; CSC: central serous chorioretinopathy; CRVO: central retinal vein occlusion; DR: diabetic retinopathy; Undiag: undiagnosed; VKH: Vogt-Koyanagi-Harada disease; wetAMD: wet age-related macular degeneration.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e51926_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows the average <italic>F</italic><sub>1</sub>-score of ophthalmologists, ophthalmology interns, ChatGPT with English prompts (Direct-English and Step-English), and ChatGPT with Chinese prompts (Direct-Chinese and Step-Chinese). Although ChatGPT performed better than experts for some disease types (eg, Direct-English and Step-English prompts for the normal and CRVO categories and all prompts for the BRVO category), the overall performance of ophthalmologists was the best (89.35%), followed by ophthalmology interns (82.69%), ChatGPT with Direct-English and Step-English prompts (76.76%), and ChatGPT with Direct-Chinese and Step-Chinese prompts (73.04%).</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Diagnostic performance of humans and ChatGPT. BRVO: branch retinal vein occlusion; CSC: central serous chorioretinopathy; CRVO: central retinal vein occlusion; DR: diabetic retinopathy; VKH: Vogt-Koyanagi-Harada disease; wetAMD: wet age-related macular degeneration.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e51926_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Inference Ability</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> presents the outcomes of ChatGPT’s inference ability, as evaluated by 2 ophthalmologists. The results of continuous measurements were presented descriptively as mean (SD) values. Based on the 509 FFA reports, Step-English prompts exhibited a tendency to require more reasoning steps for each report (<italic>P</italic>&#60;.001, Wilcoxon rank-sum test). Although the average number of inference errors was similar (0.4 per report) between Step-Chinese and Step-English prompts (<italic>P</italic>=.88, Wilcoxon rank-sum test), Step-English prompts brought less incompleteness (44.31%), hallucinations (0.59%), and misinformation (1.96%) (all <italic>P</italic>&#60;.001, Chi-square test). Instead, Step-Chinese prompts, which involved fewer reasoning steps, were more prone to having key information from the original report overlooked during the reasoning process, although this difference lacked statistical significance (<italic>P</italic>=.68, Chi-square test). In addition, a few generated diagnoses were marked as inconsistent with the reasoning process, with no statistical difference observed between Step-Chinese and Step-English prompts. Overall, compared with Step-Chinese prompts, ChatGPT demonstrated stronger inference abilities for Step-English prompts, particularly with regard to the average number of reasoning steps, incompleteness, hallucinations, and misinformation.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Inference ability of ChatGPT in the diagnosis of retinal vascular diseases.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="530"/>
            <col width="180"/>
            <col width="180"/>
            <col width="110"/>
            <thead>
              <tr valign="bottom">
                <td>Measurement</td>
                <td>Step-Chinese</td>
                <td>Step-English</td>
                <td><italic>P</italic> value<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Reasoning steps per report, mean (SD)</td>
                <td>1.4 (0.8)</td>
                <td>2.6 (1.5)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Reasoning errors per report, mean (SD)</td>
                <td>0.4 (0.5)</td>
                <td>0.4 (0.6)</td>
                <td>0.88</td>
              </tr>
              <tr valign="top">
                <td>Incompleteness (%)</td>
                <td>63.53</td>
                <td>44.31</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Omission of information (%)</td>
                <td>0.78</td>
                <td>0.39</td>
                <td>0.68</td>
              </tr>
              <tr valign="top">
                <td>Hallucinations (%)</td>
                <td>5.88</td>
                <td>0.59</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Misinformation (%)</td>
                <td>7.84</td>
                <td>1.96</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Inconsistency (%)</td>
                <td>0.59</td>
                <td>0.39</td>
                <td>&#62;.99</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Chi-square tests were used for categorical measurements and Wilcoxon rank-sum tests for continuous measurements.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Robustness</title>
        <p>Using different prompt strategies introduces some variability in ChatGPT's responses to a given FFA report. Hence, we evaluated the robustness of ChatGPT with different language prompts through calculating the average diagnostic performance for 4 prompting methods: Direct, Step, Step (more detail), and Step (2-step) (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The precision, recall, and <italic>F</italic><sub>1</sub>-score, presented descriptively as mean (SD) values, were compared between Chinese and English prompts using Student <italic>t</italic> tests. As shown in <xref ref-type="table" rid="table4">Table 4</xref>, the results indicated significant differences in the recall (<italic>P</italic>=.03) and <italic>F</italic><sub>1</sub>-score (<italic>P</italic>=.04) between Chinese and English prompts, while no significant difference was observed in the precision (<italic>P</italic>=.18). The mean (SD) <italic>F</italic><sub>1</sub>-scores of ChatGPT with Chinese and English prompts were 70.02% (3.54%) and 76.47% (2.61%), respectively. In short, the diagnostic performance of ChatGPT with English prompts was better and more robust.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The robustness of ChatGPT with various prompts in Chinese and English.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="360"/>
            <col width="230"/>
            <col width="230"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Diagnostic performance (%), mean (SD)</td>
                <td>Chinese prompt</td>
                <td>English prompt</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Precision</td>
                <td>74.38 (1.49)</td>
                <td>76.64 (2.10)</td>
                <td>.18</td>
              </tr>
              <tr valign="top">
                <td>Recall</td>
                <td>68.78 (3.03)</td>
                <td>78.71 (4.46)</td>
                <td>.03</td>
              </tr>
              <tr valign="top">
                <td><italic>F</italic><sub>1</sub>-score</td>
                <td>70.02 (3.54)</td>
                <td>76.47 (2.61)</td>
                <td>.04</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>To the best of our knowledge, this is the first study to evaluate ChatGPT’s performance on non-English clinical text for retinal vascular disease diagnosis. We have developed a systematic evaluation scheme that encompasses objective diagnostic performance, professional inference abilities, and comparisons with the diagnostic ability of experts. Through large-scale experiments and analysis, we found the potential of LLMs as medical assistants to provide diagnosis, and identified challenges faced by ChatGPT in the field of health care, especially regarding language disparity.</p>
        <p>Our results demonstrated that ChatGPT can preliminarily diagnose retinal vascular diseases based on Chinese FFA reports and achieved a high <italic>F</italic><sub>1</sub>-score of 80.05% at best. However, the diagnostic performance of ChatGPT varied significantly among different diseases and prompting languages. The performance for common DR was significantly better than that for the more uncommon VKH, which is relatively low in incidence and more difficult to diagnose. Another noteworthy phenomenon is the language disparity of ChatGPT. Given that the development and validation of ChatGPT predominantly relied on English-centric data sets [<xref ref-type="bibr" rid="ref31">31</xref>] and that non-English medical corpora are even more scarce, compared to with English prompts, ChatGPT exhibited a significant decline in diagnostic performance with Chinese prompts, with a 6.45% decrease in <italic>F</italic><sub>1</sub>-score. This language disparity poses challenges for the effective application of ChatGPT in non-English clinical settings.</p>
        <p>Meanwhile, the diagnosis accompanied by reasoning steps did not necessarily lead to performance improvement; <italic>F</italic><sub>1</sub>-scores decreased by 6.59% for English prompts but increased by 5.14% for Chinese prompts. This disparity may be attributed to ChatGPT’s training being mainly on English corpora, with Direct-English prompts enabling a straightforward mapping from input to diagnosis. In contrast, Step-English prompts tended to bring more mistakes than benefits through multistep internal reasoning. However, for Chinese prompts, the scarcity of Chinese training data results in limited knowledge for disease diagnosis. Step-Chinese prompts, with the requirement of a reasoning process, can effectively compensate for incomplete and incorrect reasoning caused by limited knowledge, although they may introduce some noise. The performance gap between different diseases and prompting strategies demonstrates the potential unfairness brought by the overrepresentation of the major diseases, languages, and countries. This limitation hinders the global applicability of ChatGPT, particularly in non–English-speaking countries.</p>
        <p>From the perspective of clinical practice, ChatGPT’s diagnostic performance still did not reach the level of ophthalmologists or even ophthalmology interns. It is worth noting that ChatGPT may be conservative in disease diagnosis. Despite the instruction restriction (must identify one), certain responses involved multiple conditions or indicated an inability to conclude based on existing information. Notably, although ChatGPT can derive a reasoning process to improve clinical interpretability, ophthalmologists identified some harmful mistakes, such as generating medical findings not mentioned in the original reports and quoting incorrect prior knowledge. More in-depth investigation and careful regulation are required before applying ChatGPT in the health care domain. Also, it is imperative to incorporate more extensive and higher-quality clinical data sets and knowledge into ChatGPT [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>].</p>
      </sec>
      <sec>
        <title>Comparison to Prior Work</title>
        <p>Prior work in using artificial intelligence (AI) for the automated diagnosis of retinal vascular disease has yielded promising outcomes [<xref ref-type="bibr" rid="ref35">35</xref>]. However, since some hospitals struggle to produce qualified FFA reports [<xref ref-type="bibr" rid="ref36">36</xref>] and require ophthalmologists with extensive clinical experience or retinal specialists, the majority of these studies have predominantly focused on analyzing FFA images. Ryu et al [<xref ref-type="bibr" rid="ref37">37</xref>] introduced an end-to-end deep convolutional neural network–based method specifically designed for the automatic detection of DR and the assessment of lesion status. Similarly, Ding et al [<xref ref-type="bibr" rid="ref38">38</xref>] proposed a pipeline for detecting retinal vessels in FFA images using deep neural networks. Moreover, Li et al [<xref ref-type="bibr" rid="ref39">39</xref>] presented a weakly supervised learning-based method for detecting fluorescein leakage, eliminating the need for manual annotation of leakage areas. In contrast to research predominantly centered on lesion detection or specific disease diagnoses, Zhao et al [<xref ref-type="bibr" rid="ref40">40</xref>] developed an AI system capable of automating image phase identification, diagnosing 4 different types of retinal diseases, and segmenting ischemic areas using FFA images. In our study, we used ChatGPT with 4 different prompting strategies based on FFA reports to diagnose a series of retinal diseases. Notably, when using an English prompt for direct diagnosis, ChatGPT exhibited impressive performance in the classification of retinal vascular diseases, requiring no additional training.</p>
        <p>Beyond diagnostic accuracy, researchers have dedicated efforts to enhance the interpretability of disease diagnoses [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. The widely used method for this purpose is heatmap visualization [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>], used to accentuate characteristic regions crucial for disease diagnosis. This method may not capture the nuanced interplay of features critical for accurate diagnosis, leading to a potential loss of information and subtlety in the interpretative process. In this study, ChatGPT showed promise in enhancing the interpretability of disease diagnoses by explaining the process of diagnostic reasoning step by step. Its capacity to generate human-readable responses also allows for a more intuitive understanding of the AI diagnostic process.</p>
        <p>ChatGPT has been used for various applications in clinical care and research. While numerous studies have demonstrated promising outcomes in complex medical tasks, including the United States Medical Licensing Exam (USMLE) [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref43">43</xref>], simplifying imaging reports for patients [<xref ref-type="bibr" rid="ref27">27</xref>] and aiding decision-making [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>], it is crucial to note that ChatGPT exhibits certain limitations. In the execution of the aforementioned tasks, ChatGPT occasionally produces errors, such as hallucinations or incomplete information [<xref ref-type="bibr" rid="ref46">46</xref>]. However, the preceding studies were limited to the application and evaluation of ChatGPT solely within English medical contexts, neglecting an exploration of its effectiveness in non-English clinical scenarios. This study fills this gap by leveraging Chinese FFA reports to assess ChatGPT’s diagnostic performance and inference abilities for retinal vascular diseases in a non-English clinical environment and exploring the appropriate prompt languages and strategies.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study has several limitations. First, we did not fully utilize all the information available in clinical scenarios to conduct a diagnosis, such as more detailed FFA images, which may have reduced the diagnostic accuracy due to incomplete information. Since ChatGPT cannot analyze images, we will further evaluate the capabilities of multimodal models in subsequent research. Second, this study was not conducted in clinical practice. A prospective clinical trial can better examine an LLM’s clinical benefit; we leave this to our future work.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study conducted extensive experiments to evaluate the diagnostic capabilities of ChatGPT in retinal vascular diseases, including objective diagnostic performance and professional reasoning analysis evaluated by ophthalmologists. ChatGPT with English prompts for direct diagnosis performed best, achieving results close to the diagnostic performance of ophthalmology interns with 2 years of clinical experience. On the contrary, due to limited Chinese training data and knowledge, ChatGPT with Chinese prompts led to incomplete reasoning and poor diagnostic performance, which demonstrates that there is a significant language disparity in the application of ChatGPT in clinical environments. Additionally, although ChatGPT can derive a reasoning process with a low error rate, mistakes such as misinformation and hallucinations still exist, which will mislead the diagnose of retinal vascular diseases. This study generally reveals the potential of LLMs to serve as a helpful medical assistant to provide diagnosis in non-English clinical environments, but also demonstrates the potential limitations and the need to continually explore more robust LLMs in ophthalmology practice.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Example of the input and output of ChatGPT with various prompts.</p>
        <media xlink:href="jmir_v26i1e51926_app1.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">API</term>
          <def>
            <p>application programing interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BRVO</term>
          <def>
            <p>branch retinal vein occlusion</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CRVO</term>
          <def>
            <p>central retinal vein occlusion</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DR</term>
          <def>
            <p>diabetic retinopathy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">FFA</term>
          <def>
            <p>fundus fluorescein angiography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LLMs</term>
          <def>
            <p>large language models</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SAHZU</term>
          <def>
            <p>The Second Affiliated Hospital of Zhejiang University</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">VKH</term>
          <def>
            <p>Vogt-Koyanagi-Harada disease</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>KJ was supported by Natural Science Foundation of China (grant 82201195). YW was supported by Medical Scientific Research Foundation of Zhejiang Province, China (grant 2022502730).</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>Data will be made available for research purposes upon request. Data requests are to be directed to KJ.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>XL and JW conducted the experiments and statistical analysis, drafted the work, and contributed equally to this work as co–first authors. J Yang is the senior author; he designed the study and drafted the work with XL and JW. AS and WS participated in the data extraction. AS, WS, PY, and YW participated in the expert evaluation. J Ye and KJ are corresponding authors; they provided clinical guidance and administrative, technical, and material support. All authors revised the manuscript and approved the submitted version.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>GBD 2019 BlindnessVision Impairment Collaborators</collab>
            <collab>Vision Loss Expert Group of the Global Burden of Disease Study</collab>
          </person-group>
          <article-title>Trends in prevalence of blindness and distance and near vision impairment over 30 years: an analysis for the Global Burden of Disease Study</article-title>
          <source>Lancet Glob Health</source>
          <year>2021</year>
          <month>02</month>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>e130</fpage>
          <lpage>e143</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2214-109X(20)30425-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2214-109X(20)30425-3</pub-id>
          <pub-id pub-id-type="medline">33275950</pub-id>
          <pub-id pub-id-type="pii">S2214-109X(20)30425-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7820390</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gahlaut</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Suarez</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Uddin</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Jayagopal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Nanoengineering of therapeutics for retinal vascular disease</article-title>
          <source>Eur J Pharm Biopharm</source>
          <year>2015</year>
          <month>09</month>
          <volume>95</volume>
          <issue>Pt B</issue>
          <fpage>323</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26022642"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ejpb.2015.05.001</pub-id>
          <pub-id pub-id-type="medline">26022642</pub-id>
          <pub-id pub-id-type="pii">S0939-6411(15)00219-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4604030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marmoy</surname>
              <given-names>OR</given-names>
            </name>
            <name name-style="western">
              <surname>Henderson</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Ooi</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Recommended protocol for performing oral fundus fluorescein angiography (FFA) in children</article-title>
          <source>Eye (Lond)</source>
          <year>2022</year>
          <month>01</month>
          <volume>36</volume>
          <issue>1</issue>
          <fpage>234</fpage>
          <lpage>236</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33323986"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41433-020-01328-6</pub-id>
          <pub-id pub-id-type="medline">33323986</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41433-020-01328-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8727556</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Automatic interpretation and clinical evaluation for fundus fluorescein angiography images of diabetic retinopathy patients by deep learning</article-title>
          <source>Br J Ophthalmol</source>
          <year>2023</year>
          <month>11</month>
          <day>22</day>
          <volume>107</volume>
          <issue>12</issue>
          <fpage>1852</fpage>
          <lpage>1858</lpage>
          <pub-id pub-id-type="doi">10.1136/bjo-2022-321472</pub-id>
          <pub-id pub-id-type="medline">36171054</pub-id>
          <pub-id pub-id-type="pii">bjo-2022-321472</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Multi-label classification of retinal lesions in diabetic retinopathy for automatic analysis of fundus fluorescein angiography based on deep learning</article-title>
          <source>Graefes Arch Clin Exp Ophthalmol</source>
          <year>2020</year>
          <month>04</month>
          <volume>258</volume>
          <issue>4</issue>
          <fpage>779</fpage>
          <lpage>785</lpage>
          <pub-id pub-id-type="doi">10.1007/s00417-019-04575-w</pub-id>
          <pub-id pub-id-type="medline">31932886</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00417-019-04575-w</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Automatic detection of non-perfusion areas in diabetic macular edema from fundus fluorescein angiography for decision making using deep learning</article-title>
          <source>Sci Rep</source>
          <year>2020</year>
          <month>09</month>
          <day>15</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>15138</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-020-71622-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-020-71622-6</pub-id>
          <pub-id pub-id-type="medline">32934283</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-020-71622-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7492239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yip</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Automatic detection of leakage point in central serous chorioretinopathy of fundus fluorescein angiography based on time sequence deep learning</article-title>
          <source>Graefes Arch Clin Exp Ophthalmol</source>
          <year>2021</year>
          <month>08</month>
          <volume>259</volume>
          <issue>8</issue>
          <fpage>2401</fpage>
          <lpage>2411</lpage>
          <pub-id pub-id-type="doi">10.1007/s00417-021-05151-x</pub-id>
          <pub-id pub-id-type="medline">33846835</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00417-021-05151-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holomcik</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Seeböck</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gerendas</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Mylonas</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Najeeb</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt-Erfurth</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Deak</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Segmentation of macular neovascularization and leakage in fluorescein angiography images in neovascular age-related macular degeneration using deep learning</article-title>
          <source>Eye (Lond)</source>
          <year>2023</year>
          <month>05</month>
          <volume>37</volume>
          <issue>7</issue>
          <fpage>1439</fpage>
          <lpage>1444</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35778604"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41433-022-02156-6</pub-id>
          <pub-id pub-id-type="medline">35778604</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41433-022-02156-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC10169785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huck</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>DeepOpht: medical report generation for retinal images via deep modelsvisual explanation</article-title>
          <source>Proceedings of the 2021 IEEE Winter Conference on Applications of Computer Vision (WACV)</source>
          <year>2021</year>
          <conf-name>2021 IEEE Winter Conference on Applications of Computer Vision (WACV)</conf-name>
          <conf-date>January 3-8, 2021</conf-date>
          <conf-loc>Waikoloa, HI</conf-loc>
          <pub-id pub-id-type="doi">10.1109/wacv48630.2021.00249</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Verspoor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Cross-modal clinical graph Transformer for ophthalmic report generation</article-title>
          <source>Proceedings of the 2022 IEEE/CVF Conference on Computer VisionPattern Recognition (CVPR)</source>
          <year>2022</year>
          <conf-name>2022 IEEE/CVF Conference on Computer VisionPattern Recognition (CVPR)</conf-name>
          <conf-date>June 18-24, 2022</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <publisher-loc>USA</publisher-loc>
          <publisher-name>IEEE</publisher-name>
          <pub-id pub-id-type="doi">10.1109/cvpr52688.2022.02000</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <article-title>Introducing ChatGPT</article-title>
          <source>OpenAI</source>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/blog/chatgpt/">https://openai.com/blog/chatgpt/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>King</surname>
              <given-names>MR</given-names>
            </name>
          </person-group>
          <article-title>The future of AI in medicine: a perspective from a chatbot</article-title>
          <source>Ann Biomed Eng</source>
          <year>2023</year>
          <month>02</month>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>291</fpage>
          <lpage>295</lpage>
          <pub-id pub-id-type="doi">10.1007/s10439-022-03121-w</pub-id>
          <pub-id pub-id-type="medline">36572824</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10439-022-03121-w</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Qiang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Summary of ChatGPT-related research and perspective towards the future of large language models</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>04</month>
          <day>04</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2304.01852">https://arxiv.org/abs/2304.01852</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Janssen</surname>
              <given-names>BV</given-names>
            </name>
            <name name-style="western">
              <surname>Kazemier</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Besselink</surname>
              <given-names>MG</given-names>
            </name>
          </person-group>
          <article-title>The use of ChatGPT and other large language models in surgical science</article-title>
          <source>BJS Open</source>
          <year>2023</year>
          <month>03</month>
          <day>07</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>zrad032</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36960954"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bjsopen/zrad032</pub-id>
          <pub-id pub-id-type="medline">36960954</pub-id>
          <pub-id pub-id-type="pii">7085520</pub-id>
          <pub-id pub-id-type="pmcid">PMC10037421</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Patterson</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Wanderer</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Turer</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Sittig</surname>
              <given-names>DF</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Using AI-generated suggestions from ChatGPT to optimize clinical decision support</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2023</year>
          <month>06</month>
          <day>20</day>
          <volume>30</volume>
          <issue>7</issue>
          <fpage>1237</fpage>
          <lpage>1245</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37087108"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocad072</pub-id>
          <pub-id pub-id-type="medline">37087108</pub-id>
          <pub-id pub-id-type="pii">7136722</pub-id>
          <pub-id pub-id-type="pmcid">PMC10280357</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>LY</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>XC</given-names>
            </name>
            <name name-style="western">
              <surname>Nejatian</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Nasir-Moin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Abidin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Eaton</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Riina</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Laufer</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Punjabi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Miceli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>NC</given-names>
            </name>
            <name name-style="western">
              <surname>Orillac</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schnurman</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Livia</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kurland</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Neifert</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dastagirzada</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kondziolka</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>ATM</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Aphinyanaphongs</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Oermann</surname>
              <given-names>EK</given-names>
            </name>
          </person-group>
          <article-title>Health system-scale language models are all-purpose prediction engines</article-title>
          <source>Nature</source>
          <year>2023</year>
          <month>07</month>
          <volume>619</volume>
          <issue>7969</issue>
          <fpage>357</fpage>
          <lpage>362</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37286606"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41586-023-06160-y</pub-id>
          <pub-id pub-id-type="medline">37286606</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41586-023-06160-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC10338337</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kung</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Cheatham</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Medenilla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sillos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>De Leon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Elepaño</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Madriaga</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aggabao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz-Candido</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Maningo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models</article-title>
          <source>PLOS Digit Health</source>
          <year>2023</year>
          <month>02</month>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e0000198</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812645"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id>
          <pub-id pub-id-type="medline">36812645</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-22-00371</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kumah-Crystal</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mankowitz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Embi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>CU</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT and the clinical informatics board examination: the end of unproctored maintenance of certification?</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2023</year>
          <month>08</month>
          <day>18</day>
          <volume>30</volume>
          <issue>9</issue>
          <fpage>1558</fpage>
          <lpage>1560</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocad104</pub-id>
          <pub-id pub-id-type="medline">37335851</pub-id>
          <pub-id pub-id-type="pii">7202064</pub-id>
          <pub-id pub-id-type="pmcid">PMC10436139</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mihalache</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Popovic</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Muni</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Performance of an artificial intelligence chatbot in ophthalmic knowledge assessment</article-title>
          <source>JAMA Ophthalmol</source>
          <year>2023</year>
          <month>06</month>
          <day>01</day>
          <volume>141</volume>
          <issue>6</issue>
          <fpage>589</fpage>
          <lpage>597</lpage>
          <pub-id pub-id-type="doi">10.1001/jamaophthalmol.2023.1144</pub-id>
          <pub-id pub-id-type="medline">37103928</pub-id>
          <pub-id pub-id-type="pii">2804364</pub-id>
          <pub-id pub-id-type="pmcid">PMC10141269</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarraju</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bruemmer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Iterson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Laffin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Appropriateness of cardiovascular disease prevention recommendations obtained from a popular online chat-based artificial intelligence model</article-title>
          <source>JAMA</source>
          <year>2023</year>
          <month>03</month>
          <day>14</day>
          <volume>329</volume>
          <issue>10</issue>
          <fpage>842</fpage>
          <lpage>844</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36735264"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2023.1044</pub-id>
          <pub-id pub-id-type="medline">36735264</pub-id>
          <pub-id pub-id-type="pii">2801244</pub-id>
          <pub-id pub-id-type="pmcid">PMC10015303</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>VD</given-names>
            </name>
            <name name-style="western">
              <surname>Ngo</surname>
              <given-names>NT</given-names>
            </name>
            <name name-style="western">
              <surname>Veyseh</surname>
              <given-names>APB</given-names>
            </name>
            <name name-style="western">
              <surname>Man</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dernoncourt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bui</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TH</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT beyond English: towards a comprehensive evaluation of large language models in multilingual learning</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>04</month>
          <day>12</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2304.05613">https://arxiv.org/abs/2304.05613</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Should ChatGPT be biased? Challenges and risks of bias in large language models</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>04</month>
          <day>07</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2304.03738">https://arxiv.org/abs/2304.03738</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Schuurmans</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bosma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ichter</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Chain-of-thought prompting elicits reasoning in large language models</article-title>
          <year>2022</year>
          <conf-name>36th Conference on Neural Information Processing Systems (NeurIPS 2022)</conf-name>
          <conf-date>November 28-December 9, 2022</conf-date>
          <conf-loc>New Orleans, Louisiana</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sallam</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns</article-title>
          <source>Healthcare (Basel)</source>
          <year>2023</year>
          <month>03</month>
          <day>19</day>
          <volume>11</volume>
          <issue>6</issue>
          <fpage>887</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=healthcare11060887"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/healthcare11060887</pub-id>
          <pub-id pub-id-type="medline">36981544</pub-id>
          <pub-id pub-id-type="pii">healthcare11060887</pub-id>
          <pub-id pub-id-type="pmcid">PMC10048148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cahyawijaya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wilie</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lovenia</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Do</surname>
              <given-names>QV</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A multitask, multilingual, multimodal evaluation of ChatGPT on reasoning, hallucination, and interactivity</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>02</month>
          <day>08</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2302.04023">http://arxiv.org/abs/2302.04023</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ning</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Teng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the logical reasoning ability of ChatGPT and GPT-4</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>04</month>
          <day>7</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2304.03439">http://arxiv.org/abs/2304.03439</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jeblick</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schachtner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Dexl</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mittermeier</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stüber</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Topalis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wesp</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sabel</surname>
              <given-names>BO</given-names>
            </name>
            <name name-style="western">
              <surname>Ricke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ingrisch</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT makes medicine easy to swallow: an exploratory case study on simplified radiology reports</article-title>
          <source>Eur Radiol</source>
          <year>2023</year>
          <month>10</month>
          <day>05</day>
          <volume>1</volume>
          <fpage>1</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1007/s00330-023-10213-1</pub-id>
          <pub-id pub-id-type="medline">37794249</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00330-023-10213-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Frieske</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ishii</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Madotto</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Survey of hallucination in natural language generation</article-title>
          <source>arXiv</source>
          <year>2022</year>
          <month>2</month>
          <day>8</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2202.03629">https://arxiv.org/abs/2202.03629</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zuccon</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Koopman</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Dr ChatGPT, tell me what I want to hear: how prompt knowledge impacts health answer correctness</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>2</month>
          <day>3</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2302.13793">http://arxiv.org/abs/2302.13793</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Borji</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A categorical archive of ChatGPT failures</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>2</month>
          <day>6</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2302.03494">http://arxiv.org/abs/2302.03494</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blevins</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Language contamination helps explain the cross-lingual capabilities of English pretrained models</article-title>
          <source>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2022</year>
          <conf-name>2022 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>December 7-11, 2022</conf-date>
          <conf-loc>Abu Dhabi, UAE</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2022.emnlp-main.233</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Qualifying chinese medical licensing examination with knowledge enhanced generative pre-training model</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>5</month>
          <day>17</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2305.10163">http://arxiv.org/abs/2305.10163</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singhal</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Azizi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mahdavi</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Scales</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tanwani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cole-Lewis</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pfohl</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Payne</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Seneviratne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gamble</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Babiker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schärli</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chowdhery</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mansfield</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Agüera Y Arcas</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Webster</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Matias</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gottweis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tomasev</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rajkomar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barral</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Semturs</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikesalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Natarajan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Large language models encode clinical knowledge</article-title>
          <source>Nature</source>
          <year>2023</year>
          <month>08</month>
          <volume>620</volume>
          <issue>7972</issue>
          <fpage>172</fpage>
          <lpage>180</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37438534"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41586-023-06291-2</pub-id>
          <pub-id pub-id-type="medline">37438534</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41586-023-06291-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC10396962</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>PourNejatian</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Magoc</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lipori</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Ospina</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Hogan</surname>
              <given-names>WR</given-names>
            </name>
            <name name-style="western">
              <surname>Shenkman</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A study of generative large language model for medical research and healthcare</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <month>11</month>
          <day>16</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>210</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-023-00958-w"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00958-w</pub-id>
          <pub-id pub-id-type="medline">37973919</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00958-w</pub-id>
          <pub-id pub-id-type="pmcid">PMC10654385</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence and deep learning in ophthalmology: current status and future perspectives</article-title>
          <source>Adv Ophthalmol Pract Res</source>
          <year>2022</year>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>100078</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2667-3762(22)00055-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.aopr.2022.100078</pub-id>
          <pub-id pub-id-type="medline">37846285</pub-id>
          <pub-id pub-id-type="pii">S2667-3762(22)00055-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC10577833</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Preventing corneal blindness caused by keratitis using artificial intelligence</article-title>
          <source>Nat Commun</source>
          <year>2021</year>
          <month>06</month>
          <day>18</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>3738</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-021-24116-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-021-24116-6</pub-id>
          <pub-id pub-id-type="medline">34145294</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-021-24116-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8213803</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Sagong</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A deep learning model for identifying diabetic retinopathy using optical coherence tomography angiography</article-title>
          <source>Sci Rep</source>
          <year>2021</year>
          <month>11</month>
          <day>26</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>23024</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-021-02479-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-021-02479-6</pub-id>
          <pub-id pub-id-type="medline">34837030</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-021-02479-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8626435</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bawany</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Kuriyan</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Ramchandran</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Wykoff</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A novel deep learning pipeline for retinal vessel detection in fluorescein angiography</article-title>
          <source>IEEE Trans Image Process</source>
          <year>2020</year>
          <month>05</month>
          <day>08</day>
          <fpage>6561</fpage>
          <lpage>6173</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32396087"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/TIP.2020.2991530</pub-id>
          <pub-id pub-id-type="medline">32396087</pub-id>
          <pub-id pub-id-type="pmcid">PMC7648732</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A weakly supervised deep learning approach for leakage detection in fluorescein angiography images</article-title>
          <source>Transl Vis Sci Technol</source>
          <year>2022</year>
          <month>03</month>
          <day>02</day>
          <volume>11</volume>
          <issue>3</issue>
          <fpage>9</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35262648"/>
          </comment>
          <pub-id pub-id-type="doi">10.1167/tvst.11.3.9</pub-id>
          <pub-id pub-id-type="medline">35262648</pub-id>
          <pub-id pub-id-type="pii">2778640</pub-id>
          <pub-id pub-id-type="pmcid">PMC8934548</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tsui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>An artificial intelligence system for the whole process from diagnosis to treatment suggestion of ischemic retinal diseases</article-title>
          <source>Cell Rep Med</source>
          <year>2023</year>
          <month>10</month>
          <day>17</day>
          <volume>4</volume>
          <issue>10</issue>
          <fpage>101197</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2666-3791(23)00364-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.xcrm.2023.101197</pub-id>
          <pub-id pub-id-type="medline">37734379</pub-id>
          <pub-id pub-id-type="pii">S2666-3791(23)00364-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC10591037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chetoui</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Akhloufi</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Explainable end-to-end deep learning for diabetic retinopathy detection across multiple datasets</article-title>
          <source>J Med Imaging (Bellingham)</source>
          <year>2020</year>
          <month>07</month>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>044503</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32904519"/>
          </comment>
          <pub-id pub-id-type="doi">10.1117/1.JMI.7.4.044503</pub-id>
          <pub-id pub-id-type="medline">32904519</pub-id>
          <pub-id pub-id-type="pii">19327RR</pub-id>
          <pub-id pub-id-type="pmcid">PMC7456641</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sheng</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>A deep learning system for detecting diabetic retinopathy across the disease spectrum</article-title>
          <source>Nat Commun</source>
          <year>2021</year>
          <month>05</month>
          <day>28</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>3242</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-021-23458-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-021-23458-5</pub-id>
          <pub-id pub-id-type="medline">34050158</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-021-23458-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC8163820</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sorin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vaid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Soroush</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Charney</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Klang</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Comparing ChatGPT and GPT-4 performance in USMLE soft skill assessments</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <month>10</month>
          <day>01</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>16492</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-023-43436-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-023-43436-9</pub-id>
          <pub-id pub-id-type="medline">37779171</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-43436-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC10543445</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sorin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Klang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sklair-Levy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zippel</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Balint Lahat</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Konen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Barash</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Large language model (ChatGPT) as a support tool for breast tumor board</article-title>
          <source>NPJ Breast Cancer</source>
          <year>2023</year>
          <month>05</month>
          <day>30</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>44</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41523-023-00557-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41523-023-00557-8</pub-id>
          <pub-id pub-id-type="medline">37253791</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41523-023-00557-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10229606</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Truhn</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bressem</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kather</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Kuhl</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nebelung</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A pilot study on the efficacy of GPT-4 in providing orthopedic treatment recommendations from MRI reports</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <month>11</month>
          <day>17</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>20159</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-023-47500-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-023-47500-2</pub-id>
          <pub-id pub-id-type="medline">37978240</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-47500-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC10656559</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Luu</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Bi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Siren's song in the AI ocean: a survey on hallucination in large language models</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>9</month>
          <day>3</day>
          <access-date>2023-08-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2309.01219">http://arxiv.org/abs/2309.01219</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
