<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e60063</article-id>
      <article-id pub-id-type="pmid">39661433</article-id>
      <article-id pub-id-type="doi">10.2196/60063</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>EyeGPT for Patient Inquiries and Medical Education: Development and Validation of an Ophthalmology Large Language Model</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Coristine</surname>
            <given-names>Andrew</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jin</surname>
            <given-names>Kai</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chee</surname>
            <given-names>Marcel Lucas</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Rojas-Carabali</surname>
            <given-names>William</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Saxena</surname>
            <given-names>Amit</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Scherer</surname>
            <given-names>Rafael</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Xiaolan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1581-5045</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Ziwei</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-4551-348X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Weiyi</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-2780-9121</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Pusheng</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3195-4822</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Yue</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-8283-3854</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Mingpu</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0052-0837</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Gao</surname>
            <given-names>Le</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-7494-1315</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Yinwen</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4254-0972</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Shang</surname>
            <given-names>Xianwen</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2362-3222</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Shi</surname>
            <given-names>Danli</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>School of Optometry</institution>
            <institution>The Hong Kong Polytechnic University</institution>
            <addr-line>11 Yuk Choi Road</addr-line>
            <addr-line>Hung Hom, KLN</addr-line>
            <addr-line>Hong Kong, 999077</addr-line>
            <country>China</country>
            <phone>852 27664825</phone>
            <email>danli.shi@polyu.edu.hk</email>
          </address>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6094-137X</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>He</surname>
            <given-names>Mingguang</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6912-2810</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Optometry</institution>
        <institution>The Hong Kong Polytechnic University</institution>
        <addr-line>Hong Kong</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>State Key Laboratory of Ophthalmology</institution>
        <institution>Zhongshan Ophthalmic Center, Sun Yat-sen University</institution>
        <institution>Guangdong Provincial Key Laboratory of Ophthalmology and Visual Science, Guangdong Provincial Clinical Research Center for Ocular Diseases</institution>
        <addr-line>Guangzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Ophthalmology</institution>
        <institution>Shanghai General Hospital (Shanghai First People’s Hospital), School of Medicine</institution>
        <institution>Shanghai Jiao Tong University</institution>
        <addr-line>Shanghai</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>National Clinical Research Center for Eye Diseases</institution>
        <addr-line>Shanghai</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Research Centre for SHARP Vision (RCSV)</institution>
        <institution>The Hong Kong Polytechnic University</institution>
        <addr-line>Hong Kong</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Centre for Eye and Vision Research (CEVR)</institution>
        <institution>17W Hong Kong Science Park</institution>
        <addr-line>Hong Kong</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Danli Shi <email>danli.shi@polyu.edu.hk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>11</day>
        <month>12</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e60063</elocation-id>
      <history>
        <date date-type="received">
          <day>1</day>
          <month>5</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>22</day>
          <month>8</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>1</day>
          <month>10</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>11</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Xiaolan Chen, Ziwei Zhao, Weiyi Zhang, Pusheng Xu, Yue Wu, Mingpu Xu, Le Gao, Yinwen Li, Xianwen Shang, Danli Shi, Mingguang He. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 11.12.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e60063" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Large language models (LLMs) have the potential to enhance clinical flow and improve medical education, but they encounter challenges related to specialized knowledge in ophthalmology.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to enhance ophthalmic knowledge by refining a general LLM into an ophthalmology-specialized assistant for patient inquiries and medical education.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We transformed Llama2 into an ophthalmology-specialized LLM, termed EyeGPT, through the following 3 strategies: prompt engineering for role-playing, fine-tuning with publicly available data sets filtered for eye-specific terminology (83,919 samples), and retrieval-augmented generation leveraging a medical database and 14 ophthalmology textbooks. The efficacy of various EyeGPT variants was evaluated by 4 board-certified ophthalmologists through comprehensive use of 120 diverse category questions in both simple and complex question-answering scenarios. The performance of the best EyeGPT model was then compared with that of the unassisted human physician group and the EyeGPT+human group. We proposed 4 metrics for assessment: accuracy, understandability, trustworthiness, and empathy. The proportion of hallucinations was also reported.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The best fine-tuned model significantly outperformed the original Llama2 model at providing informed advice (mean 9.30, SD 4.42 vs mean 13.79, SD 5.70; <italic>P</italic>&lt;.001) and mitigating hallucinations (97/120, 80.8% vs 53/120, 44.2%, <italic>P</italic>&lt;.001). Incorporating information retrieval from reliable sources, particularly ophthalmology textbooks, further improved the model's response compared with solely the best fine-tuned model (mean 13.08, SD 5.43 vs mean 15.14, SD 4.64; <italic>P</italic>=.001) and reduced hallucinations (71/120, 59.2% vs 57/120, 47.4%, <italic>P</italic>=.02). Subgroup analysis revealed that EyeGPT showed robustness across common diseases, with consistent performance across different users and domains. Among the variants, the model integrating fine-tuning and book retrieval ranked highest, closely followed by the combination of fine-tuning and the manual database, standalone fine-tuning, and pure role-playing methods. EyeGPT demonstrated competitive capabilities in understandability and empathy when compared with human ophthalmologists. With the assistance of EyeGPT, the performance of the ophthalmologist was notably enhanced.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We pioneered and introduced EyeGPT by refining a general domain LLM and conducted a comprehensive comparison and evaluation of different strategies to develop an ophthalmology-specific assistant. Our results highlight EyeGPT’s potential to assist ophthalmologists and patients in medical settings.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>large language model</kwd>
        <kwd>generative pretrained transformer</kwd>
        <kwd>generative artificial intelligence</kwd>
        <kwd>ophthalmology</kwd>
        <kwd>retrieval-augmented generation</kwd>
        <kwd>medical assistant</kwd>
        <kwd>EyeGPT</kwd>
        <kwd>generative AI</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Ophthalmic diseases pose significant concerns for public health [<xref ref-type="bibr" rid="ref1">1</xref>]. However, shortages of professionals and inefficiencies in primary eye care systems often funnel patients into overcrowded tertiary centers. This results in extended wait times and unaddressed postconsultation questions, frequently requiring additional face-to-face appointments [<xref ref-type="bibr" rid="ref2">2</xref>]. These challenges can be attributed to the limited ophthalmic knowledge among patients and the limited experience in eye care among primary health care providers [<xref ref-type="bibr" rid="ref3">3</xref>]. Therefore, there is a pressing need to enhance ophthalmic health education for both patients and primary health care providers. However, relying solely on manpower to address these issues presents further challenges, particularly as the rate of population aging continues to outpace the growth rate of ophthalmologists.</p>
      <p>Large language models (LLMs) have recently emerged as powerful tools to alleviate these burdens and streamline clinical flow with the capability of understanding and generating human-like text [<xref ref-type="bibr" rid="ref4">4</xref>]. In ophthalmology, LLMs show promise both for ophthalmic certification exams [<xref ref-type="bibr" rid="ref5">5</xref>] and interpreting imaging reports across various linguistic environments [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. However, there are several limitations to existing LLMs. First, there are challenges with addressing specialized ophthalmology knowledge for general LLMs. Previous research has demonstrated the suboptimal performance of ChatGPT in ophthalmology, with only 15.4% of the responses graded as completely accurate in vitreoretinal disease [<xref ref-type="bibr" rid="ref8">8</xref>]. Even with GPT-4, which currently exhibits the greatest capability, nonnegligible instances of misinformation occur, with only 30.6%, 21.5%, and 55.6% of responses about ocular multimodal images considered accurate, highly usable, and harmless, respectively [<xref ref-type="bibr" rid="ref9">9</xref>]. A critical factor underlying these shortcomings is the model’s insufficient grasp of specialized knowledge, particularly in handling medical abbreviations and jargon within highly specialized domains [<xref ref-type="bibr" rid="ref5">5</xref>]. Therefore, there is a need to design a dedicated model trained on clinically relevant domain data. Second, it is widely recognized that LLMs occasionally generate inaccurate and misleading statements (hallucinations), which can potentially lead to medical errors. Fine-tuning with professional data can somewhat mitigate hallucinations, but the model can still produce them when faced with unfamiliar input [<xref ref-type="bibr" rid="ref10">10</xref>]. Therefore, additional solutions are required. Third, there is a noticeable absence of comprehensive evaluations for LLMs in ophthalmology. Although previous studies have explored the ophthalmic question-answering (QA) capabilities of LLMs, the majority have been limited to multiple-choice formats [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Although a few studies have used open-ended questions to evaluate the performance of LLMs, they lack detailed categorization of the questions and primarily focus on scattered aspects such as accuracy, comprehensiveness, or safety [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Consequently, a comprehensive evaluation framework is urgently needed to test ophthalmology-related LLMs and compare their responses with those provided by certified ophthalmologists.</p>
      <p>Recognizing this, we aimed to develop an artificial intelligence (AI) assistant, namely EyeGPT, to meet the specific informational needs in ophthalmic clinical and educational scenarios. By leveraging Llama2, a flexible and scalable open-source LLM known for its impressive performance in medicine [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>], we infused the model with a granular level of ophthalmic expertise through role-playing, fine-tuning, and retrieval-augmented generation (RAG). The resultant model, EyeGPT, was evaluated for its efficacy in patient consultations and medical education. This work provides valuable insights into building and evaluating ophthalmic assistants, paving the way for the next generation of AI-assisted ophthalmic practice.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study overview is presented in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Our research protocol adhered to the principles of the Helsinki Declaration. The study was approved by the Institutional Review Board of the Hong Kong Polytechnic University (number: HSEARS20240202004). This research involves publicly available data. We ensured that the data were deidentified and all private information was removed. Informed consent was unnecessary as the publicly available data do not contain identifiable information.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of this study. GPT: generative pre-trained transformer; MCQA: multiple-choice question answering; RAG: retrieval-augmented generation; USMLE=United States Medical Licensing Examination.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e60063_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Development of EyeGPT</title>
        <sec>
          <title>Base Model</title>
          <p>We used Meta’s Llama2 as the base model in our study, which was trained on 2 trillion tokens from publicly accessible data [<xref ref-type="bibr" rid="ref19">19</xref>]. We used the Llama2-7b-chat model, which was additionally fine-tuned on publicly available instruction data sets and over 1 million human annotations, thus having basic conversation skills [<xref ref-type="bibr" rid="ref20">20</xref>]. To inject professional ophthalmic knowledge into the model, we did experiments successively under the scenarios described in the following paragraphs.</p>
        </sec>
        <sec>
          <title>Role-Playing</title>
          <p>In generative AI, the engineering technique known as “role-playing” involves directing LLMs to “embody” or “imitate” specific roles for improved results [<xref ref-type="bibr" rid="ref21">21</xref>]. To enable the LLM to generate more relevant and empathetic responses, we assigned it the role of an “ophthalmologist” and the user the dual roles of a “patient” and “medical student.” This was achieved by giving the following instructions: “Suppose you are an ophthalmologist, and you need to answer the patient’s question with care/student’s question with patience.”</p>
        </sec>
        <sec>
          <title>Fine-Tuning</title>
          <p>To inject domain-specific knowledge and make Llama2 more proficient in capturing ophthalmic terminologies and logical reasoning, we trained it on domain-specific data sets, including MedAlpaca [<xref ref-type="bibr" rid="ref22">22</xref>], GenMedGPT-HealthCareMagic [<xref ref-type="bibr" rid="ref23">23</xref>], MedMCQA [<xref ref-type="bibr" rid="ref24">24</xref>], and the United States Medical Licensing Examination (USMLE). Processing of the USMLE data followed the method proposed by Jin et al [<xref ref-type="bibr" rid="ref25">25</xref>]. The data sets were filtered to remove conversations of little practical significance and responses with errors. We used instruction tuning [<xref ref-type="bibr" rid="ref26">26</xref>] to align the model with task-specific user objectives, enhance model controllability, and ensure rapid domain-specific adaptation. For data sets initially designed for multiple-choice QA, we automatically added an instruction at the beginning: “Answer the multiple-choice question.” For our specific task, we filtered out nonophthalmology data with eye-related keywords. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> presents the characteristics of the filtered data sets, and <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> lists the keywords we used.</p>
          <p>The final data set comprised 83,919 samples, with 81,919 used for training and 2000 used for validation. We used low-rank adaptation (LoRA) [<xref ref-type="bibr" rid="ref27">27</xref>] to fine-tune the Llama2-7B model by adding a low-rank matrix while keeping the original parameters frozen, aiming to complement the original weight matrices of the model. The models were fine-tuned using 3*V100 GPUs with a batch size of 24, learning rate of 0.00003, maximum sequence length of 512 tokens, and warm-up ratio of 0.03. For LoRA-specific hyperparameters, the rank of low-rank factorization was 8, the scaling factor for the rank was 16, and the dropout was 0.05. Specifically, we performed 3 types of fine-tuning: Fine-tune 1 (2000 iterations), Fine-tune 2 (3500 iterations), and Fine-tune 3 (10,000 iterations). The entire training process took approximately 11 hours to complete.</p>
        </sec>
        <sec>
          <title>Retrieval-Augmented Generation</title>
          <p>LLMs may produce potential inaccuracies responses (hallucinations) to questions [<xref ref-type="bibr" rid="ref28">28</xref>], which is unacceptable in the medical field. However, the accuracy of these models could be significantly improved if they could generate responses based on a reliable knowledge database. Here, to further improve the performance of EyeGPT, we introduced the external knowledge corpus of medical books and a manual database.</p>
          <p>For the medical books, we used 14 specialized ophthalmology textbooks that cover a wide range of comprehensive ophthalmic knowledge, including general ophthalmology, optometry, retinal diseases, and more [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. Please refer to <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> for the specific textbook list.</p>
          <p>We manually built a database (sample shown in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>) containing information on diseases, symptoms, medical tests and treatment procedures, and potential medications. This database, sourced from the open-access web and research papers, serves as an external and offline knowledge corpus for EyeGPT. It can be continually updated without model retraining and may provide more up-to-date information than textbooks.</p>
          <p>To leverage external knowledge, we adopted the LangChain framework’s information retrieval techniques. The “all-MiniLM-L6-v2” [<xref ref-type="bibr" rid="ref32">32</xref>] open-source embedding model was used to map text into vector space. We used the “RecursiveCharacterTextSplitter” [<xref ref-type="bibr" rid="ref33">33</xref>] to segment the text for efficient retrieval, with a chunk size set to 1024 characters. Roughly 2 segments are retrieved from the vector storage for each response. In addition, we constructed a retriever with Facebook AI Similarity Search (FAISS) [<xref ref-type="bibr" rid="ref34">34</xref>] based on the segmented documents and established a conversational retrieval chain that seamlessly integrated our EyeGPT with the external database through LangChain.</p>
        </sec>
      </sec>
      <sec>
        <title>Evaluation</title>
        <sec>
          <title>Overview of the Evaluation</title>
          <p>To assess the professional performance of various EyeGPT variants, namely (1) original (Llama2), (2) role-play (original plus role-play), (3) fine-tune 1-3 (fine-tuned model versions 1-3 plus role-play), (4) role-play+book (role-play plus book retrieval), (5) role-play+database (role-play plus manual database retrieval), (6) best fine-tune+book (the best fine-tuned model plus book retrieval), (7) best fine-tune+database (the best fine-tuned model plus manual database retrieval), our ophthalmology expert panel curated a set of 120 ophthalmic care-related questions based on their clinical expertise. We followed the user-centered evaluation approach proposed by Abbasian et al [<xref ref-type="bibr" rid="ref35">35</xref>], considering the following 3 key factors: disease type, character type, and domain type. Disease type covered a wide range of medical conditions from various subspecialties, including common, specialty, and rare diseases, resulting in 12 disease categories such as myopia, retinal detachment, and Stickler syndrome (refer to <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> for the detailed disease list). Character types included patients and medical students representing potential EyeGPT users. Domain types were divided into 5 topics: disease description, risk factors, diagnosis, treatment and prevention, and prognosis. We conducted the evaluations manually, including an independent evaluation of different EyeGPT variants, best-ranked comparisons for evaluating human-machine performance, and error analysis of the machine.</p>
        </sec>
        <sec>
          <title>Independent Evaluation</title>
          <p>This evaluation was designed to compare the performance of various optimization strategies of the EyeGPT variants and identify the best-performing one. Two board-certified ophthalmologists independently conducted manual assessment using a 5-point scale to assess the responses of each variant. The evaluation focused on the following 4 aspects: accuracy, understandability, trustworthiness, and empathy [<xref ref-type="bibr" rid="ref35">35</xref>]. The detailed grading scale is presented in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>. The scale ranged from 1 (strongly disagree) to 5 (strongly agree), with the average score from the 2 evaluators recorded as the score for each response aspect. The maximum score for each aspect was 5, and these scores were summed to obtain the final score for each response, with a maximum possible score of 20.</p>
          <p>To evaluate the effectiveness of different optimization strategies in mitigating hallucinations, we defined answers with accuracy scores below 4 as containing hallucinations in our study. To ensure the evaluators could not identify the source of the responses, all generated responses were formatted as plain text, concealing any model-specific features. These responses were then randomly shuffled and mixed before being presented to the evaluators.</p>
          <p>The evaluation was conducted in 2 rounds with a 1-month washout period to mitigate residual effects [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. In the first round, we compared models using different fine-tuning approaches, including original, role-play, and fine-tune 1-3. The goal was to determine the best fine-tuning model for the subsequent RAG. In the second round, we compared models using different RAG strategies based on the best-performing fine-tuned model selected from the first round. These models included best fine-tune (the best fine-tuned model from round 1), role-play+database, best fine-tune+database, role-play+book, and best fine-tune+book.</p>
        </sec>
        <sec>
          <title>Best-Ranked Comparison</title>
          <p>After independently evaluating the different EyeGPT variants, we identified the best-performing system. To assess if EyeGPT can match ophthalmologists’ expertise and offer them assistance, we conducted a human-machine best-ranked comparison. This evaluation method, inspired by that of Tu et al [<xref ref-type="bibr" rid="ref38">38</xref>], aimed to efficiently assess answers comprehensively, reducing the need for assessors to delve into every detail and thereby minimizing subjectivity.</p>
          <p>We invited 2 junior ophthalmologists (with 1-3 years of clinical experience) to answer the 120 questions with and without the aid of EyeGPT. The answers from different groups (EyeGPT, unassisted ophthalmologist, and EyeGPT+ophthalmologist) were evaluated by 2 senior ophthalmologists (with over 3 years of clinical experience) who were unaware of the sources, and the presentation order was randomized. Raters were asked to rank the 3 answers based on their clinical judgment across 4 dimensions, without the option of declaring a tie. In cases of disagreement, an ophthalmology expert (with over 10 years of clinical experience) reviewed the case until consensus was reached. The final result was recorded as the proportion of responses from different sources ranked as the best.</p>
        </sec>
        <sec>
          <title>Error Analysis</title>
          <p>To further investigate the quality of EyeGPT answers and identify areas for improvement, we conducted an error analysis on the best-performing EyeGPT model. The quality of the EyeGPT-generated QA pairs was evaluated by 2 board-certified ophthalmologists based on their expert judgment. The analysis focused on identifying occurrences of unrelated information, factual errors, incomplete information, and faulty logic [<xref ref-type="bibr" rid="ref39">39</xref>].</p>
        </sec>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>Statistical analyses were conducted using R (Version 4.3.1). The Mann-Whitney <italic>U</italic> test was used to compare the scores of the 2 models in the independent evaluation. When creating the bar chart, we compared the performance of the base model (Llama 2 or best fine-tune) with the most competitive optimization model in the same round to display statistically significant differences on the chart. The score for each answer in the independent evaluation was based on the average score from 2 raters. The scoring criteria used in the bar chart were as follows: strongly disagree (1 to &lt;2), agree (2 to &lt;3), neutral (3 to &lt;4), approve (4 to &lt;5), strongly agree (5). For subgroup analysis based on different confounding variables, the Kruskal-Wallis test and Mann-Whitney <italic>U</italic> test were used, depending on the number of comparison groups. Cohen kappa was calculated to determine the agreement among raters [<xref ref-type="bibr" rid="ref40">40</xref>]. <italic>P</italic> values &lt;.05 were considered statistically significant.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Comparative Study of Model Construction Strategies</title>
        <sec>
          <title>Overall Performance</title>
          <p>In the first round of evaluation, the total scores for the original, role-play, and fine-tune 1-3 models were 9.30, 12.79, 12.95, 12.83, and 13.79, respectively. All optimized models significantly outperformed the original model in accuracy, understandability, trustworthiness, and empathy, with fine-tune 3 performing the best. For the different fine-tuning variants, we observed that, as the number of iterations increased, the evaluation loss on the test data decreased (refer to <xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>) and the model performance improved. In the subsequent comparison of RAG strategies, the best fine-tune+book model scored the highest, at 15.14, outperforming other strategies, as elaborated in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref><bold>.</bold> To ensure reliability, we compared the scores of fine-tune 3 (named best fine-tune in round 2) across 2 rounds. We found no statistically significant difference between the scores of the 2 rounds (<italic>P</italic>=.11). Inter-rater reliability in 2 rounds of independent evaluation was confirmed, with kappa values ranging from 0.611 to 0.872, indicating substantial agreement among raters (<xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>). For illustrative examples of the varied grades of responses from the independent evaluation, see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>0.</p>
          <p><xref rid="figure2" ref-type="fig">Figure 2</xref> demonstrates that more than one-half (accuracy: 67/120, 55.8%; understandability: 74/120, 61.7%; trustworthiness: 75/120, 62.5%; empathy: 74/120, 61.7%) of responses from the best fine-tune model were considered “good” responses (rated 4 or above) across all 4 dimensions. Compared with the original model (with an 80.8% [97/120] hallucination rate), the role-play and best fine-tune models mitigated hallucinations, by 30% (36/120) and 36.7% (44/120), respectively. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows that the best fine-tune+book model further enhanced the proportion of “good” responses to the maximum. We compared the performance of the best model in round 1 (fine-tune 3) with the most competitive modified model to check for statistically significant differences. The scores and scoring criteria are the same as in <xref rid="figure2" ref-type="fig">Figure 2</xref>. Compared with the best fine-tune model, the best fine-tune+database and best fine-tune+book models further reduced hallucinations by 3.3% (4/120) and 11.7% (14/120), respectively.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Performance in terms of (A) accuracy, (B) understandability, (C) trustworthiness, and (D) empathy of the different models in round 1 of the human evaluation, with the percentage of good responses (strongly agree and agree) indicated by the black numbers, the percentage of hallucinations indicated by the blue numbers, and significance determined using Mann-Whitney <italic>U</italic> tests.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e60063_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Performance in terms of (A) accuracy, (B) understandability, (C) trustworthiness, and (D) empathy of the different models in round 2 of the human evaluation, with the percentage of good responses (strongly agree and agree) indicated by the black numbers, the percentage of hallucinations indicated by the blue numbers, and significance determined using Mann-Whitney <italic>U</italic> tests.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e60063_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Subgroup Analysis</title>
          <p>We also performed subgroup analysis to further evaluate the model performance under different confounding factors, including subspecialty questions of varying difficulty levels, questions raised by different characters, and question domains.</p>
          <sec>
            <title>Different Subspecialties</title>
            <p>Across all RAG strategies, the models scored higher for common diseases than for specialty and rare conditions (<xref ref-type="table" rid="table1">Table 1</xref>). For common ophthalmic conditions, the RAG models delivered more precise and contextually relevant information. For more specialized conditions like central serous chorioretinopathy, the best fine-tune model provided general information about its treatment options, while the RAG models offered more specialized responses concerning laser treatment and photodynamic therapy depending on the specific circumstances. For rare conditions like morning glory syndrome, although the best fine-tune model could not generate responses as it mistakenly identified it as “bilateral posterior superior temporal arcade spikes,” the RAG model was able to retrieve relevant information from the external knowledge database and make accurate responses. The best fine-tune model accurately recognized 43% (13/30) of ophthalmic abbreviations. RAG strategies improved this recognition rate, ranging from 60% (18/30) to 83% (25/30) for different models.</p>
            <table-wrap position="float" id="table1">
              <label>Table 1</label>
              <caption>
                <p>Subgroup analysis of the performance of EyeGPT by subspecialty.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="200"/>
                <col width="220"/>
                <col width="220"/>
                <col width="200"/>
                <col width="160"/>
                <thead>
                  <tr valign="top">
                    <td>EyeGPT model</td>
                    <td>Common diseases, mean (SD)<sup>a</sup></td>
                    <td>Specialty diseases, mean (SD)<sup>a</sup></td>
                    <td>Rare diseases, mean (SD)<sup>a</sup></td>
                    <td><italic>P</italic> value</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Best fine-tune<sup>b</sup></td>
                    <td>15.79 (4.15)</td>
                    <td>13.11 (5.40)</td>
                    <td>10.33 (5.31)</td>
                    <td>&lt;.001</td>
                  </tr>
                  <tr valign="top">
                    <td>Role-play+database<sup>c</sup></td>
                    <td>15.28 (5.17)</td>
                    <td>14.18 (5.11)</td>
                    <td>12.18 (4.88)</td>
                    <td>.01</td>
                  </tr>
                  <tr valign="top">
                    <td>Best fine-tune+database<sup>d</sup></td>
                    <td>15.45 (4.57)</td>
                    <td>14.29 (4.89)</td>
                    <td>12.17 (4.58)</td>
                    <td>.01</td>
                  </tr>
                  <tr valign="top">
                    <td>Role-play+book<sup>e</sup></td>
                    <td>15.70 (3.94)</td>
                    <td>12.89 (5.32)</td>
                    <td>14.66 (4.53)</td>
                    <td>.02</td>
                  </tr>
                  <tr valign="top">
                    <td>Best fine-tune+book<sup>f</sup></td>
                    <td>17.24 (3.02)</td>
                    <td>14.08 (5.49)</td>
                    <td>14.23 (4.42)</td>
                    <td>.003</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table1fn1">
                  <p><sup>a</sup>Overall response score (the sum of 4 rating dimensions, with a maximum score of 20 representing the best performance).</p>
                </fn>
                <fn id="table1fn2">
                  <p><sup>b</sup>The fine-tuned model with 10,000 iterations.</p>
                </fn>
                <fn id="table1fn3">
                  <p><sup>c</sup>Role-play plus manual database retrieval.</p>
                </fn>
                <fn id="table1fn4">
                  <p><sup>d</sup>The best fine-tuned model plus manual database retrieval.</p>
                </fn>
                <fn id="table1fn5">
                  <p><sup>e</sup>Role-play plus book retrieval.</p>
                </fn>
                <fn id="table1fn6">
                  <p><sup>f</sup>The best fine-tuned model plus book retrieval.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Different Role-Play Characters</title>
            <p>When comparing the influence of the questioner’s assumed identity—patient versus medical student—on model performance, responses to patients consistently scored higher than those of medical students (<xref ref-type="table" rid="table2">Table 2</xref>). This difference reached statistical significance in the best fine-tune and role-play+database models. However, no significant differences were observed with the best fine-tune+database, role-play+book, and best fine-tune+book models, suggesting that these adjusted models can answer both general patient questions and more specialized queries from medical students.</p>
            <table-wrap position="float" id="table2">
              <label>Table 2</label>
              <caption>
                <p>Subgroup analysis of the performance of EyeGPT by role-play character.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="250"/>
                <col width="250"/>
                <col width="250"/>
                <col width="250"/>
                <thead>
                  <tr valign="top">
                    <td>EyeGPT model</td>
                    <td>Patients, mean (SD)<sup>a</sup></td>
                    <td>Medical students, mean (SD)<sup>a</sup></td>
                    <td><italic>P</italic> value</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Best fine-tune<sup>b</sup></td>
                    <td>13.45 (5.79)</td>
                    <td>10.99 (6.32)</td>
                    <td>.03</td>
                  </tr>
                  <tr valign="top">
                    <td>Role-play+database<sup>c</sup></td>
                    <td>14.67 (5.07)</td>
                    <td>11.62 (6.55)</td>
                    <td>.03</td>
                  </tr>
                  <tr valign="top">
                    <td>Best fine-tune+database<sup>d</sup></td>
                    <td>14.52 (4.99)</td>
                    <td>12.84 (5.34)</td>
                    <td>.08</td>
                  </tr>
                  <tr valign="top">
                    <td>Role-play+book<sup>e</sup></td>
                    <td>14.85 (4.78)</td>
                    <td>12.65 (5.85)</td>
                    <td>.06</td>
                  </tr>
                  <tr valign="top">
                    <td>Best fine-tune+book<sup>f</sup></td>
                    <td>14.44 (6.13)</td>
                    <td>13.38 (5.83)</td>
                    <td>.07</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table2fn1">
                  <p><sup>a</sup>Overall response score (the sum of 4 rating dimensions, with a maximum score of 20 representing the best performance).</p>
                </fn>
                <fn id="table2fn2">
                  <p><sup>b</sup>The fine-tuned model with 10,000 iterations.</p>
                </fn>
                <fn id="table2fn3">
                  <p><sup>c</sup>Role-play plus manual database retrieval.</p>
                </fn>
                <fn id="table2fn4">
                  <p><sup>d</sup>The best fine-tuned model plus manual database retrieval.</p>
                </fn>
                <fn id="table2fn5">
                  <p><sup>e</sup>Role-play plus book retrieval.</p>
                </fn>
                <fn id="table2fn6">
                  <p><sup>f</sup>The best fine-tuned model plus book retrieval.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Different Domains</title>
            <p>In the subgroup analysis of EyeGPT’s performance across different domains, there were no statistically significant differences in the scores of disease description, risk factors, diagnosis, treatment and prevention, and prognosis across all models (<xref ref-type="table" rid="table3">Table 3</xref>).</p>
            <table-wrap position="float" id="table3">
              <label>Table 3</label>
              <caption>
                <p>Subgroup analysis of the performance of EyeGPT by domain.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="170"/>
                <col width="150"/>
                <col width="140"/>
                <col width="140"/>
                <col width="170"/>
                <col width="140"/>
                <col width="90"/>
                <thead>
                  <tr valign="top">
                    <td>EyeGPT model</td>
                    <td>Disease description, mean (SD)<sup>a</sup></td>
                    <td>Risk factors, mean (SD)<sup>a</sup></td>
                    <td>Diagnosis, mean (SD)<sup>a</sup></td>
                    <td>Treatment and prevention, mean (SD)<sup>a</sup></td>
                    <td>Prognosis, mean (SD)<sup>a</sup></td>
                    <td><italic>P</italic> value</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Best fine-tune<sup>b</sup></td>
                    <td>12.92 (6.51)</td>
                    <td>12.67 (5.38)</td>
                    <td>11.81 (6.45)</td>
                    <td>13.21 (5.12)</td>
                    <td>9.90 (6.60)</td>
                    <td>.35</td>
                  </tr>
                  <tr valign="top">
                    <td>Role-play+database<sup>c</sup></td>
                    <td>12.98 (6.57)</td>
                    <td>12.73 (6.00)</td>
                    <td>13.73 (5.38)</td>
                    <td>13.53 (4.55)</td>
                    <td>11.48 (6.26)</td>
                    <td>.49</td>
                  </tr>
                  <tr valign="top">
                    <td>Best fine-tune+database<sup>d</sup></td>
                    <td>15.14 (4.76)</td>
                    <td>12.08 (5.64)</td>
                    <td>14.70 (2.98)</td>
                    <td>12.78 (4.82)</td>
                    <td>11.17 (6.10)</td>
                    <td>.06</td>
                  </tr>
                  <tr valign="top">
                    <td>Role-play+book<sup>e</sup></td>
                    <td>12.19 (6.15)</td>
                    <td>13.78 (5.61)</td>
                    <td>13.91 (5.92)</td>
                    <td>14.60 (3.31)</td>
                    <td>13.00 (5.18)</td>
                    <td>.80</td>
                  </tr>
                  <tr valign="top">
                    <td>Best fine-tune+book<sup>f</sup></td>
                    <td>11.70 (7.46)</td>
                    <td>13.33 (6.53)</td>
                    <td>15.15 (4.64)</td>
                    <td>13.64 (5.79)</td>
                    <td>12.17 (6.14)</td>
                    <td>.36</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table3fn1">
                  <p><sup>a</sup>Overall response score (the sum of 4 rating dimensions, with a maximum score of 20 representing the best performance).</p>
                </fn>
                <fn id="table3fn2">
                  <p><sup>b</sup>The fine-tuned model with 10,000 iterations.</p>
                </fn>
                <fn id="table3fn3">
                  <p><sup>c</sup>Role-play plus manual database retrieval.</p>
                </fn>
                <fn id="table3fn4">
                  <p><sup>d</sup>The best fine-tuned model plus manual database retrieval.</p>
                </fn>
                <fn id="table3fn5">
                  <p><sup>e</sup>Role-play plus book retrieval.</p>
                </fn>
                <fn id="table3fn6">
                  <p><sup>f</sup>The best fine-tuned model plus book retrieval.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Performance Comparison: AI Model Versus Human Ophthalmologists</title>
        <p>In the human-machine best-ranked comparison, EyeGPT showed competitive capabilities, particularly in understandability and empathy. With the assistance of EyeGPT, human ophthalmologists’ performance was notably improved. <xref rid="figure4" ref-type="fig">Figure 4</xref> summarizes the frequencies of answers generated by EyeGPT, unassisted ophthalmologists, or EyeGPT-assisted ophthalmologists, ranked as the best among the 3 candidate answers across 4 dimensions. Regarding understandability and empathy, the EyeGPT answers ranked best for 23 (19.2%) and 41 (34.2%) of the 120 questions, respectively, which were higher than those of ophthalmologists, which ranked best for 12 (10%) and 8 (6.7%) of the 120 questions. The answers provided by EyeGPT-assisted ophthalmologists were most frequently ranked as the best, at 85 (85/120, 70.8%) and 71 (71/120, 59.2%) for understandability and empathy, respectively; however, the accuracy and trustworthiness of EyeGPT answers were slightly lower than those by the ophthalmologists (accuracy: 12/120, 10% vs 14/120, 11.7%; trustworthiness: 12/120, 10% vs 15/120, 12.5%), highlighting areas for improvement. With the assistance of EyeGPT, the answers provided by the ophthalmologists excelled, ranking highest in accuracy and trustworthiness in 94 (78.3%) and 93 (77.5%) of the 120 questions, respectively. For illustrative examples of the best-ranked comparison, see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>1.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Percentage of answers ranked best by EyeGPT answers and ophthalmologists’ answers. EyeGPT(best): best fine-tune+book model.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e60063_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Error Analysis: Areas for Improvement</title>
        <p>The results of the error analysis are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>2. Rater 1 identified 5 (5/120, 4.2%) QA pairs as containing unrelated information, 35 (35/120, 29.2%) as containing apparent factual errors, 23 (23/120, 19.2%) as having incomplete information, and 6 (6/120, 5%) exhibiting faulty logic. Rater 2 found 6 (6/120, 5%) QA pairs with unrelated information, 30 (30/120, 25%) with factual errors, 22 (22/120, 18.3%) with incomplete information, and 4 (4/120, 3.3%) demonstrating faulty logic. The inter-rater reliability, assessed using kappa values, was 0.905, 0.895, 0.699, and 0.792, respectively.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we integrated specialized ophthalmic knowledge into a general LLM using role-play, fine-tuning, and RAG methods, resulting in the development of EyeGPT for ophthalmology. In terms of accuracy, understandability, trustworthiness, and empathy, all fine-tuned models showcased remarkable improvements compared with the original model. Among them, the best fine-tune model exhibited the highest efficacy. Among the RAG strategies, the best fine-tune+book model emerged as the most capable. Subgroup analysis revealed that EyeGPT performed well in the category of common diseases and showed consistent performance across different users and domains. EyeGPT demonstrated competitive capabilities in understandability and empathy when compared with a human ophthalmologist. With the assistance of EyeGPT, the performance of the ophthalmologists was notably enhanced.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>LLMs in health care raise concerns about inaccurate recommendations and fabricated information (hallucinations), which could lead to severe consequences. Previous studies have assessed the QA capabilities of existing LLMs in ophthalmology [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>], highlighting the significance of augmenting LLMs with ophthalmic expertise. Our study achieved this by using 3 optimization methods: role-play, fine-tuning, and RAG. Role-playing helped position EyeGPT as an ophthalmologist, resulting in more professional responses, evidenced by the significantly increased accuracy, understandability, and trustworthiness. By setting the input role as the patient or student, the LLM’s response tended to be more compassionate and preaching, as reflected in the higher empathy scores than those with the original model. “To Cure Sometimes, To Relieve Often, To Comfort Always” is a well-known saying in medicine reminding us that providing care involves not only treating ailments but also offering relief and comfort to patients. Similarly, AI models should also embody empathy when assisting users, underscoring the importance of role-playing in developing medical AI assistants. Fine-tuning with publicly available real-world patient-doctor interactions further enhanced EyeGPT’s knowledge and performance. In addition, we observed that the reduction in evaluation loss with the validation set was consistent with the improvement in the model’s performance as evaluated by ophthalmologists. RAG is another way to make the LLM knowledgeable, particularly to reduce hallucinations. In previous studies, Zakka et al [<xref ref-type="bibr" rid="ref10">10</xref>] developed Almanac, an LLM framework augmented with retrieval capabilities from curated medical resources for medical guidelines and treatment recommendations. In ophthalmology, Singer et al [<xref ref-type="bibr" rid="ref41">41</xref>] used verified ophthalmology textbooks as source material, providing citations to address the trustworthiness and accuracy gaps in LLM responses to Ophthalmic Knowledge Assessment Program style queries. In this study, hallucination mitigation was also observed in the model enhanced by the manual database or books, reducing it by over 3.4% compared with the best fine-tune model. Among them, the best fine-tune+book model demonstrated the highest proportion of mitigating hallucinations and outperformed best fine-tune+database in all 4 aspects, which could potentially be attributed to the fact that the books surpassed the self-manual database regarding content richness and reference value.</p>
        <p>Interestingly, we found no significant difference in performance between RAG and fine-tuned models. Fine-tuning is a popular approach but has limitations. One limitation is its dependence on specific formats of medical dialogue data, which are scarce and require validation and curation by medical professionals [<xref ref-type="bibr" rid="ref2">2</xref>]. RAG overcomes these issues by directly leveraging authoritative external resources like textbooks, medical literature, or professional websites [<xref ref-type="bibr" rid="ref42">42</xref>]. However, it is important to note that these optimization methods are not mutually exclusive. Our results demonstrated the combined effectiveness of fine-tuning and RAG, with the best-performing EyeGPT model obtained through integration. Furthermore, the data used for fine-tuning are publicly available and reliable, and the enhanced ophthalmic books are also openly accessible, rendering these strategies valuable references for future specific LLMs.</p>
        <p>The health care environment is complex; therefore, it is essential to assess the performance of health care AI models in different scenarios [<xref ref-type="bibr" rid="ref28">28</xref>]. Current research has primarily focused on evaluation for general questions [<xref ref-type="bibr" rid="ref15">15</xref>], with limited studies on specific and rare diseases. Our study validated EyeGPT by analyzing its performance across various disease categories, demonstrating strong performance in common diseases but indicating room for improvement in special and rare diseases. Future improvements can be achieved by using high-quality data sets, specialized external knowledge resources, and exploring low-shot or few-shot learning. Additionally, we found that solely fine-tuned and RAG models were less informative for specialized medical student inquiries than simpler patient inquiries. The best-performing EyeGPT performed equally well for patient and student inquiries, suggesting that combining fine-tuning and RAG enhances LLM’s expertise in meeting the needs of both groups. Importantly, our evaluation set covers a wide range of question categories, from common diseases to rare diseases, and user roles encompassing patients and medical students, including disease descriptions, examinations, treatments, and more. By establishing multiple evaluation dimensions, including accuracy, understandability, trustworthiness, empathy, and hallucination, we aimed to provide a comprehensive reference framework for future ophthalmic specialized models.</p>
        <p>Despite a growing global ophthalmologist workforce, limited-resource countries face a severe shortage of specialists [<xref ref-type="bibr" rid="ref43">43</xref>]. EyeGPT has the potential to address this gap. Although its accuracy and trustworthiness are lower than those of human ophthalmologists, our findings show competitive capabilities in terms of understandability and empathy. This finding aligns with another study demonstrating the potential advantages of LLMs at enhancing efficiency and empathy in outpatient environments [<xref ref-type="bibr" rid="ref44">44</xref>]. We attribute this to EyeGPT’s ability to patiently process large amounts of information and initiate and conclude conversations with consistent courtesy, unaffected by fatigue, emotions, or other factors. Although this may not be genuine in the human sense in high-demand scenarios, EyeGPT received higher empathy scores than human doctors who may, at times, provide brief responses or use complex medical jargon due to their level of medical knowledge, potentially leading to issues with poor understandability and empathy. However, the LLMs’ simplified expressions may overlook certain nuanced yet crucial medical information, leading to decreased accuracy and trustworthiness. The error analysis revealed that the main gap lies in factual inaccuracies and incomplete responses, highlighting the need to integrate more ophthalmic knowledge into the model and combine it with the professional expertise and experience of human doctors for comprehensive decision-making. Although LLMs cannot replace human professionals, they could serve as an auxiliary tool to enhance physicians' performance. In our ideal scenario, EyeGPT acts as a continuous, personalized assistant, providing guidance and clarification to patients throughout their care journey, without relying on physical queues or multiple face-to-face interactions with health care personnel. Additionally, EyeGPT can serve as an educational tool for medical students seeking immediate clarification on complex subjects. For example, EyeGPT may help primary doctors improve their decision-making ability and reduce diagnosis time.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study has several limitations. First, the current version of the model focuses on augmenting ophthalmic knowledge at the textual level. Future iterations should prioritize enhancing the model’s image interpretation capabilities [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref45">45</xref>], crucial for ophthalmology given its heavy reliance on multimodal imaging. Second, assessing the appropriateness of medical advice may be subjective and biased by grader opinion. More efforts could be achieved in the future, for example, by incorporating a broader spectrum of ophthalmic data and real-world feedback from users including medical students and patients. Last, a more secure application at this stage is using LLMs to assist physicians in their face-to-face consultations. This pilot study has initially validated its potential, and forthcoming research should aim to disseminate the findings more widely among the population.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In conclusion, through role-playing, fine-tuning, and RAG, EyeGPT can potentially improve accuracy and efficiency in patient consultation and medical education. It may also be expected to increase access to high-quality medical consultations, especially for patients in underprivileged regions. We hope our study can make a good contribution to the current literature on ophthalmic AI assistants to provide an effective tool for enhancing health care.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Public datasets used in fine-tuning EyeGPT.</p>
        <media xlink:href="jmir_v26i1e60063_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 93 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>The specific list of keywords used in the public datasets filtering process.</p>
        <media xlink:href="jmir_v26i1e60063_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 82 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>The specific list of textbooks used in knowledge enhancement.</p>
        <media xlink:href="jmir_v26i1e60063_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 119 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Sample of our manual database.</p>
        <media xlink:href="jmir_v26i1e60063_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 427 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Specific diseases of question lists.</p>
        <media xlink:href="jmir_v26i1e60063_app5.pdf" xlink:title="PDF File  (Adobe PDF File), 97 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <media xlink:href="jmir_v26i1e60063_app6.pdf" xlink:title="PDF File  (Adobe PDF File), 109 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Tensorboard training logs of Finetune 3.</p>
        <media xlink:href="jmir_v26i1e60063_app7.pdf" xlink:title="PDF File  (Adobe PDF File), 118 KB"/>
      </supplementary-material>
      <supplementary-material id="app8">
        <label>Multimedia Appendix 8</label>
        <p>Statistical analysis of independent evaluations of 120 questions on the test set along four dimensions.</p>
        <media xlink:href="jmir_v26i1e60063_app8.pdf" xlink:title="PDF File  (Adobe PDF File), 129 KB"/>
      </supplementary-material>
      <supplementary-material id="app9">
        <label>Multimedia Appendix 9</label>
        <p>Inter-rater reliability analysis of 120 questions on the test set along four dimensions in independent evaluation.</p>
        <media xlink:href="jmir_v26i1e60063_app9.pdf" xlink:title="PDF File  (Adobe PDF File), 102 KB"/>
      </supplementary-material>
      <supplementary-material id="app10">
        <label>Multimedia Appendix 10</label>
        <p>Examples of generated answers with difference grating in independent evaluation.</p>
        <media xlink:href="jmir_v26i1e60063_app10.pdf" xlink:title="PDF File  (Adobe PDF File), 152 KB"/>
      </supplementary-material>
      <supplementary-material id="app11">
        <label>Multimedia Appendix 11</label>
        <p>Examples in best-ranked comparison.</p>
        <media xlink:href="jmir_v26i1e60063_app11.pdf" xlink:title="PDF File  (Adobe PDF File), 131 KB"/>
      </supplementary-material>
      <supplementary-material id="app12">
        <label>Multimedia Appendix 12</label>
        <p>Error analysis of the EyeGPT.</p>
        <media xlink:href="jmir_v26i1e60063_app12.pdf" xlink:title="PDF File  (Adobe PDF File), 91 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">FAISS</term>
          <def>
            <p>Facebook AI Similarity Search</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LoRA</term>
          <def>
            <p>low-rank adaptation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">QA</term>
          <def>
            <p>question-answering</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RAG</term>
          <def>
            <p>retrieval-augmented generation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">USMLE</term>
          <def>
            <p>United States Medical Licensing Examination</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The study was supported by the Start-up Fund for RAPs under the Strategic Hiring Scheme (P0048623) from Hong Kong Special Administrative Region (HKSAR), the Global STEM Professorship Scheme (P0046113), and Henry G Leong Endowed Professorship in Elderly Vision Health. The sponsors or funding organizations had no role in the design or conduct of this research.</p>
      <p>We thank the InnoHK HKSAR Government for providing valuable supports.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated during this study are available in the figshare repository at [<xref ref-type="bibr" rid="ref46">46</xref>]. EyeGPT is available for use via [<xref ref-type="bibr" rid="ref47">47</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>DS and XC contributed to conceptualization. DS and WZ developed the methodology. DS, XC, ZZ, PX, YW, MX, LG, and YL were responsible for data curation, formal analysis of the data, and validation. XC, ZZ, and PX wrote the original draft of the manuscript. All authors commented on the manuscript and approved the current version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burton</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ramke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Marques</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Bourne</surname>
              <given-names>RRA</given-names>
            </name>
            <name name-style="western">
              <surname>Congdon</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ah Tong</surname>
              <given-names>BAM</given-names>
            </name>
            <name name-style="western">
              <surname>Arunga</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bachani</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bascaran</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bastawrous</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Blanchet</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Braithwaite</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Buchan</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Cairns</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cama</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chagunda</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chuluunkhuu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Crofts-Lawrence</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Denniston</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Ehrlich</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Emerson</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Frick</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Furtado</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Gichangi</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Gichuhi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Gurung</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Habtamu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jonas</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Keay</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Khanna</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Khaw</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Kuper</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kyari</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lansingh</surname>
              <given-names>VC</given-names>
            </name>
            <name name-style="western">
              <surname>Mactaggart</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Mafwiri</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Mathenge</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>McCormick</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Morjaria</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mowatt</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Muirhead</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Murthy</surname>
              <given-names>GVS</given-names>
            </name>
            <name name-style="western">
              <surname>Mwangi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Peto</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Qureshi</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Salomão</surname>
              <given-names>Solange R</given-names>
            </name>
            <name name-style="western">
              <surname>Sarah</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Shilio</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Solomon</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Swenor</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Webson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>West</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>TY</given-names>
            </name>
            <name name-style="western">
              <surname>Wormald</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yasmin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yusufu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Resnikoff</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ravilla</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Faal</surname>
              <given-names>HB</given-names>
            </name>
          </person-group>
          <article-title>The Lancet Global Health Commission on Global Eye Health: vision beyond 2020</article-title>
          <source>Lancet Glob Health</source>
          <year>2021</year>
          <month>04</month>
          <volume>9</volume>
          <issue>4</issue>
          <fpage>e489</fpage>
          <lpage>e551</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2214-109X(20)30488-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2214-109X(20)30488-5</pub-id>
          <pub-id pub-id-type="medline">33607016</pub-id>
          <pub-id pub-id-type="pii">S2214-109X(20)30488-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7966694</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Betzler</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Ning</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Kawasaki</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>van Wijngaarden</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grzybowski</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ran Ran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DSW</given-names>
            </name>
            <name name-style="western">
              <surname>Teo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ruamviboonsuk</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sivaprasad</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhary</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Tadayoni</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>YX</given-names>
            </name>
            <name name-style="western">
              <surname>Tham</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>TY</given-names>
            </name>
          </person-group>
          <article-title>Large language models and their impact in ophthalmology</article-title>
          <source>The Lancet Digital Health</source>
          <year>2023</year>
          <month>12</month>
          <volume>5</volume>
          <issue>12</issue>
          <fpage>e917</fpage>
          <lpage>e924</lpage>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(23)00201-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Swearingen</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Diabetic eye screening: knowledge and perspectives from providers and patients</article-title>
          <source>Curr Diab Rep</source>
          <year>2017</year>
          <month>08</month>
          <day>31</day>
          <volume>17</volume>
          <issue>10</issue>
          <fpage>94</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28856510"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11892-017-0911-2</pub-id>
          <pub-id pub-id-type="medline">28856510</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11892-017-0911-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6389265</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thirunavukarasu</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DSJ</given-names>
            </name>
            <name name-style="western">
              <surname>Elangovan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gutierrez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DSW</given-names>
            </name>
          </person-group>
          <article-title>Large language models in medicine</article-title>
          <source>Nat Med</source>
          <year>2023</year>
          <month>08</month>
          <volume>29</volume>
          <issue>8</issue>
          <fpage>1930</fpage>
          <lpage>1940</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id>
          <pub-id pub-id-type="medline">37460753</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-023-02448-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Antaki</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Touma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Milad</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>El-Khoury</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Duval</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the performance of ChatGPT in ophthalmology: an analysis of its successes and shortcomings</article-title>
          <source>Ophthalmol Sci</source>
          <year>2023</year>
          <month>12</month>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>100324</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2666-9145(23)00056-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.xops.2023.100324</pub-id>
          <pub-id pub-id-type="medline">37334036</pub-id>
          <pub-id pub-id-type="pii">S2666-9145(23)00056-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10272508</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Uncovering language disparity of ChatGPT on retinal vascular disease classification: cross-sectional study</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <month>01</month>
          <day>22</day>
          <volume>26</volume>
          <fpage>e51926</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e51926/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51926</pub-id>
          <pub-id pub-id-type="medline">38252483</pub-id>
          <pub-id pub-id-type="pii">v26i1e51926</pub-id>
          <pub-id pub-id-type="pmcid">PMC10845019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>FFA-GPT: an automated pipeline for fundus fluorescein angiography interpretation and question-answer</article-title>
          <source>NPJ Digit Med</source>
          <year>2024</year>
          <month>05</month>
          <day>03</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>111</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01101-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01101-z</pub-id>
          <pub-id pub-id-type="medline">38702471</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01101-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC11068733</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caranfa</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Bommakanti</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>PY</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of vitreoretinal disease information from an artificial intelligence chatbot</article-title>
          <source>JAMA Ophthalmol</source>
          <year>2023</year>
          <month>09</month>
          <day>01</day>
          <volume>141</volume>
          <issue>9</issue>
          <fpage>906</fpage>
          <lpage>907</lpage>
          <pub-id pub-id-type="doi">10.1001/jamaophthalmol.2023.3314</pub-id>
          <pub-id pub-id-type="medline">37535363</pub-id>
          <pub-id pub-id-type="pii">2807968</pub-id>
          <pub-id pub-id-type="pmcid">PMC10401388</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Unveiling the clinical incapabilities: a benchmarking study of GPT-4V(ision) for ophthalmic multimodal image analysis</article-title>
          <source>Br J Ophthalmol</source>
          <year>2024</year>
          <month>09</month>
          <day>20</day>
          <volume>108</volume>
          <issue>10</issue>
          <fpage>1384</fpage>
          <lpage>1389</lpage>
          <pub-id pub-id-type="doi">10.1136/bjo-2023-325054</pub-id>
          <pub-id pub-id-type="medline">38789133</pub-id>
          <pub-id pub-id-type="pii">bjo-2023-325054</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zakka</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shad</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chaurasia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dalal</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Moor</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fong</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alexander</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ashley</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hirsch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Melia</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sallam</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tullis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vogelsong</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Cunningham</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Hiesinger</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Almanac - retrieval-augmented language models for clinical medicine</article-title>
          <source>NEJM AI</source>
          <year>2024</year>
          <month>02</month>
          <day>25</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38343631"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/aioa2300068</pub-id>
          <pub-id pub-id-type="medline">38343631</pub-id>
          <pub-id pub-id-type="pmcid">PMC10857783</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Younessi</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Kurapati</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>OY</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>IU</given-names>
            </name>
          </person-group>
          <article-title>Comparison of GPT-3.5, GPT-4, and human user performance on a practice ophthalmology written examination</article-title>
          <source>Eye (Lond)</source>
          <year>2023</year>
          <month>12</month>
          <day>08</day>
          <volume>37</volume>
          <issue>17</issue>
          <fpage>3694</fpage>
          <lpage>3695</lpage>
          <pub-id pub-id-type="doi">10.1038/s41433-023-02564-2</pub-id>
          <pub-id pub-id-type="medline">37156862</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41433-023-02564-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC10686407</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mihalache</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Popovic</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Muni</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Performance of an artificial intelligence chatbot in ophthalmic knowledge assessment</article-title>
          <source>JAMA Ophthalmol</source>
          <year>2023</year>
          <month>06</month>
          <day>01</day>
          <volume>141</volume>
          <issue>6</issue>
          <fpage>589</fpage>
          <lpage>597</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37103928"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamaophthalmol.2023.1144</pub-id>
          <pub-id pub-id-type="medline">37103928</pub-id>
          <pub-id pub-id-type="pii">2804364</pub-id>
          <pub-id pub-id-type="pmcid">PMC10141269</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>LZ</given-names>
            </name>
            <name name-style="western">
              <surname>Shaheen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fukui</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yi</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Yannuzzi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Alabiad</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Performance of generative large language models on ophthalmology board-style questions</article-title>
          <source>Am J Ophthalmol</source>
          <year>2023</year>
          <month>10</month>
          <volume>254</volume>
          <fpage>141</fpage>
          <lpage>149</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ajo.2023.05.024</pub-id>
          <pub-id pub-id-type="medline">37339728</pub-id>
          <pub-id pub-id-type="pii">S0002-9394(23)00230-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Decker</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Trang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ramirez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Colley</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bongiovanni</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Wick</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Large language model-based chatbot vs surgeon-generated informed consent documentation for common procedures</article-title>
          <source>JAMA Netw Open</source>
          <year>2023</year>
          <month>10</month>
          <day>02</day>
          <volume>6</volume>
          <issue>10</issue>
          <fpage>e2336997</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37812419"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.36997</pub-id>
          <pub-id pub-id-type="medline">37812419</pub-id>
          <pub-id pub-id-type="pii">2810364</pub-id>
          <pub-id pub-id-type="pmcid">PMC10562939</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pushpanathan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>ZW</given-names>
            </name>
            <name name-style="western">
              <surname>Er Yew</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>DZ</given-names>
            </name>
            <name name-style="western">
              <surname>Hui'En Lin</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Lin Goh</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jin Tan</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Chang Koh</surname>
              <given-names>VT</given-names>
            </name>
            <name name-style="western">
              <surname>Tham</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Popular large language model chatbots' accuracy, comprehensiveness, and self-awareness in answering ocular symptom queries</article-title>
          <source>iScience</source>
          <year>2023</year>
          <month>11</month>
          <day>17</day>
          <volume>26</volume>
          <issue>11</issue>
          <fpage>108163</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-0042(23)02240-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.isci.2023.108163</pub-id>
          <pub-id pub-id-type="medline">37915603</pub-id>
          <pub-id pub-id-type="pii">S2589-0042(23)02240-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC10616302</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Owens</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Galvez</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Gologorskaya</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Pletcher</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Development of a liver disease-specific large language model chat interface using retrieval-augmented generation</article-title>
          <source>Hepatology</source>
          <year>2024</year>
          <month>11</month>
          <day>01</day>
          <volume>80</volume>
          <issue>5</issue>
          <fpage>1158</fpage>
          <lpage>1168</lpage>
          <pub-id pub-id-type="doi">10.1097/HEP.0000000000000834</pub-id>
          <pub-id pub-id-type="medline">38451962</pub-id>
          <pub-id pub-id-type="pii">01515467-990000000-00791</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Civettini</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zappaterra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Granelli</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Rindone</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Aroldi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bonfanti</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Colombo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Fedele</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grillo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Parma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perfetti</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Terruzzi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gambacorti-Passerini</surname>
              <given-names>Carlo</given-names>
            </name>
            <name name-style="western">
              <surname>Ramazzotti</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cavalca</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the performance of large language models in haematopoietic stem cell transplantation decision-making</article-title>
          <source>Br J Haematol</source>
          <year>2024</year>
          <month>04</month>
          <day>09</day>
          <volume>204</volume>
          <issue>4</issue>
          <fpage>1523</fpage>
          <lpage>1528</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hdl.handle.net/10281/453078"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/bjh.19200</pub-id>
          <pub-id pub-id-type="medline">38070128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sandmann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Riepenhausen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Plagwitz</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Varghese</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Systematic analysis of ChatGPT, Google search and Llama 2 for clinical decision support tasks</article-title>
          <source>Nat Commun</source>
          <year>2024</year>
          <month>03</month>
          <day>06</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>2050</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-024-46411-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-024-46411-8</pub-id>
          <pub-id pub-id-type="medline">38448475</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-024-46411-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10917796</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Touvron</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Albert</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Almahairi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Babaei</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Llama 2: Open foundation and fine-tuned chat models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on July 19</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2307.09288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taori</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gulrajani</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dubois</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>tatsu-lab / stanford_alpaca</article-title>
          <source>GitHub</source>
          <access-date>2024-11-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/tatsu-lab/stanford_alpaca">https://github.com/tatsu-lab/stanford_alpaca</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Xin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Better Zero-Shot Reasoning with Role-Play Prompting</article-title>
          <year>2024</year>
          <conf-name>Annual Conference of the North American Chapter of the Association for Computational Linguistics</conf-name>
          <conf-date>June 16–21, 2024</conf-date>
          <conf-loc>Mexico City, Mexico</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2024.naacl-long.228</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Papaioannou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grundmann</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Oberhauser</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Löser</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Truhn</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bressem</surname>
              <given-names>KK</given-names>
            </name>
          </person-group>
          <article-title>MedAlpaca -- an open-source collection of medical conversational AI models and training data</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on October 4</comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>ChatDoctor: a medical chat model fine-tuned on a large language model meta-AI (LLaMA) using medical domain knowledge</article-title>
          <source>Cureus</source>
          <year>2023</year>
          <month>06</month>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>e40895</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37492832"/>
          </comment>
          <pub-id pub-id-type="doi">10.7759/cureus.40895</pub-id>
          <pub-id pub-id-type="medline">37492832</pub-id>
          <pub-id pub-id-type="pmcid">PMC10364849</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Umapathi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sankarasubbu</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>MedMCQA: a large-scale multi-subject multi-choice dataset for medical domain question answering</article-title>
          <source>Proceedings of Machine Learning Research</source>
          <year>2022</year>
          <volume>174</volume>
          <fpage>248</fpage>
          <lpage>260</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.mlr.press/v174/pal22a.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Oufattole</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>What disease does this patient have? A large-scale open domain question answering dataset from medical exams</article-title>
          <source>Applied Sciences</source>
          <year>2021</year>
          <month>07</month>
          <day>12</day>
          <volume>11</volume>
          <issue>14</issue>
          <fpage>6421</fpage>
          <pub-id pub-id-type="doi">10.3390/app11146421</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ouyang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wainwright</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mishkin</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Slama</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ray</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schulman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hilton</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kelton</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Simens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Askell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Welinder</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Christiano</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Leike</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lowe</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Training language models to follow instructions with human feedback</article-title>
          <source>Advances in Neural Information Processing Systems</source>
          <year>2022</year>
          <fpage>35</fpage>
          <lpage>44</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2022/file/b1efde53be364a73914f58805a001731-Paper-Conference.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wallis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Allen-Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>LoRA: Low-Rank Adaptation of Large Language Models</article-title>
          <year>2021</year>
          <conf-name>The Ninth International Conference on Learning Representations</conf-name>
          <conf-date>May 3-7, 2021</conf-date>
          <conf-loc>Virtual meeting</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Evaluating large language models in medical applications: a survey</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 13</comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="book">
          <source>Basic and Clinical Science Course: 2014-2015</source>
          <year>2014</year>
          <publisher-loc>San Francisco, CA</publisher-loc>
          <publisher-name>American Academy of Ophthalmology</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Denniston</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <source>Oxford handbook of ophthalmology, third edition</source>
          <year>2014</year>
          <publisher-loc>Oxford, United Kingdom</publisher-loc>
          <publisher-name>Oxford University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sadda</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schachat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sadda</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wilkinson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wiedemann</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schachat</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Retina</source>
          <year>2013</year>
          <publisher-loc>Philadelphia, PA</publisher-loc>
          <publisher-name>Saunders Elsevier</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reimers</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks</article-title>
          <year>2019</year>
          <conf-name>Conference on Empirical Methods in Natural Language Processing and 9th International Joint Conference on Natural Language Processing</conf-name>
          <conf-date>November 3–7, 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1410</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <source>RecursiveCharacterTextSplitter</source>
          <access-date>2024-12-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apipythonlangchaincom/en/latest/character/langchain_text_splitterscharacterRecursiveCharacterTextSplitterhtml">https://apipythonlangchaincom/en/latest/character/langchain_text_splitterscharacterRecursiveCharacterTextSplitterhtml</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Douze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guzhva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Szilvasy</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mazaré</surname>
              <given-names>P-E</given-names>
            </name>
            <name name-style="western">
              <surname>Lomeli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hosseini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jégou</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The faiss library</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Sep 06</comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abbasian</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Khatibi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Azimi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Oniani</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shakeri Hossein Abad</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Thieme</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sriram</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gevaert</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rahmani</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Foundation metrics for evaluating effectiveness of healthcare conversations powered by generative AI</article-title>
          <source>NPJ Digit Med</source>
          <year>2024</year>
          <month>03</month>
          <day>29</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>82</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01074-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01074-z</pub-id>
          <pub-id pub-id-type="medline">38553625</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01074-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC10980701</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>DSW</given-names>
            </name>
            <name name-style="western">
              <surname>Makmur</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>AJL</given-names>
            </name>
            <name name-style="western">
              <surname>Sia</surname>
              <given-names>DSY</given-names>
            </name>
            <name name-style="western">
              <surname>Eide</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>HY</given-names>
            </name>
            <name name-style="western">
              <surname>Jagmohan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Khoo</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>YM</given-names>
            </name>
            <name name-style="western">
              <surname>Thian</surname>
              <given-names>YL</given-names>
            </name>
            <name name-style="western">
              <surname>Baskar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Teo</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Algazwi</surname>
              <given-names>DAR</given-names>
            </name>
            <name name-style="western">
              <surname>Yap</surname>
              <given-names>QV</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ooi</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshioka</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Quek</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Hallinan</surname>
              <given-names>JTPD</given-names>
            </name>
          </person-group>
          <article-title>Improved productivity using deep learning-assisted reporting for lumbar spine MRI</article-title>
          <source>Radiology</source>
          <year>2022</year>
          <month>10</month>
          <volume>305</volume>
          <issue>1</issue>
          <fpage>160</fpage>
          <lpage>166</lpage>
          <pub-id pub-id-type="doi">10.1148/radiol.220076</pub-id>
          <pub-id pub-id-type="medline">35699577</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Miao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schoepf</surname>
              <given-names>UJ</given-names>
            </name>
            <name name-style="western">
              <surname>Varga-Szemes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Garrison Moore</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>LJ</given-names>
            </name>
            <collab>China Aneurysm AI Project Group</collab>
          </person-group>
          <article-title>A deep-learning model for intracranial aneurysm detection on CT angiography images in China: a stepwise, multicentre, early-stage clinical validation study</article-title>
          <source>Lancet Digit Health</source>
          <year>2024</year>
          <month>04</month>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e261</fpage>
          <lpage>e271</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(23)00268-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(23)00268-6</pub-id>
          <pub-id pub-id-type="medline">38519154</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(23)00268-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Azizi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Driess</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schaekermann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Amin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tanno</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ktena</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Palepu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mustafa</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chowdhery</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kornblith</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fleet</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mansfield</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Prakash</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Virmani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Semturs</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mahdavi</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Dominowska</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Arcas</surname>
              <given-names>BAY</given-names>
            </name>
            <name name-style="western">
              <surname>Barral</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Webster</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Matias</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Singhal</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Florence</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikesalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Natarajan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Towards generalist biomedical AI</article-title>
          <source>NEJM AI</source>
          <year>2024</year>
          <month>02</month>
          <day>22</day>
          <volume>1</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1056/aioa2300138</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>ChatFFA: An ophthalmic chat system for unified vision-language understanding and question answering for fundus fluorescein angiography</article-title>
          <source>iScience</source>
          <year>2024</year>
          <month>07</month>
          <day>19</day>
          <volume>27</volume>
          <issue>7</issue>
          <fpage>110021</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-0042(24)01246-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.isci.2024.110021</pub-id>
          <pub-id pub-id-type="medline">39055931</pub-id>
          <pub-id pub-id-type="pii">S2589-0042(24)01246-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC11269310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mandrekar</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>Measures of interrater agreement</article-title>
          <source>Journal of Thoracic Oncology</source>
          <year>2011</year>
          <month>01</month>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1097/jto.0b013e318200f983</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Teng</surname>
              <given-names>CC</given-names>
            </name>
          </person-group>
          <article-title>Development and evaluation of Aeyeconsult: a novel ophthalmology chatbot leveraging verified textbook knowledge and GPT-4</article-title>
          <source>J Surg Educ</source>
          <year>2024</year>
          <month>03</month>
          <volume>81</volume>
          <issue>3</issue>
          <fpage>438</fpage>
          <lpage>443</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jsurg.2023.11.019</pub-id>
          <pub-id pub-id-type="medline">38135548</pub-id>
          <pub-id pub-id-type="pii">S1931-7204(23)00432-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Retrieval-augmented generation for large language models: A survey</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Mar 27</comment>
          <pub-id pub-id-type="doi">10.2139/SSRN.4895062</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Resnikoff</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lansingh</surname>
              <given-names>VC</given-names>
            </name>
            <name name-style="western">
              <surname>Washburn</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Felch</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gauthier</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Eckert</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Parke</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wiedemann</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Estimated number of ophthalmologists worldwide (International Council of Ophthalmology update): will we meet the needs?</article-title>
          <source>Br J Ophthalmol</source>
          <year>2020</year>
          <month>04</month>
          <volume>104</volume>
          <issue>4</issue>
          <fpage>588</fpage>
          <lpage>592</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://bjo.bmj.com/lookup/pmidlookup?view=long&amp;pmid=31266774"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bjophthalmol-2019-314336</pub-id>
          <pub-id pub-id-type="medline">31266774</pub-id>
          <pub-id pub-id-type="pii">bjophthalmol-2019-314336</pub-id>
          <pub-id pub-id-type="pmcid">PMC7147181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Nie</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Outpatient reception via collaboration between nurses and a large language model: a randomized controlled trial</article-title>
          <source>Nat Med</source>
          <year>2024</year>
          <month>10</month>
          <volume>30</volume>
          <issue>10</issue>
          <fpage>2878</fpage>
          <lpage>2885</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-03148-7</pub-id>
          <pub-id pub-id-type="medline">39009780</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-03148-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ICGA-GPT: report generation and question answering for indocyanine green angiography images</article-title>
          <source>Br J Ophthalmol</source>
          <year>2024</year>
          <month>09</month>
          <day>20</day>
          <volume>108</volume>
          <issue>10</issue>
          <fpage>1450</fpage>
          <lpage>1456</lpage>
          <pub-id pub-id-type="doi">10.1136/bjo-2023-324446</pub-id>
          <pub-id pub-id-type="medline">38508675</pub-id>
          <pub-id pub-id-type="pii">bjo-2023-324446</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <article-title>EyeQA: Evaluation Dataset for Ophthalmic Assistant with Large Language Models</article-title>
          <source>Figshare</source>
          <access-date>2024-11-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://figshare.com/s/cb0525c72ef467d2d809">https://figshare.com/s/cb0525c72ef467d2d809</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <article-title>EyeGPT</article-title>
          <source>Hugging Face</source>
          <access-date>2024-11-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/spaces/spaceis42/EyeGPT">https://huggingface.co/spaces/spaceis42/EyeGPT</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
