<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v27i1e82729</article-id><article-id pub-id-type="doi">10.2196/82729</article-id><article-categories><subj-group subj-group-type="heading"><subject>Letter to the Editor</subject></subj-group></article-categories><title-group><article-title>Author's Reply: Critical Limitations in Systematic Reviews of Large Language Models in Health Care</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Python</surname><given-names>Andre</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Li</surname><given-names>HongYi</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Fu</surname><given-names>Jun-Fen</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref><xref ref-type="aff" rid="aff7">7</xref></contrib></contrib-group><aff id="aff1"><institution>Center for Data Science, Zhejiang University</institution><addr-line>Hangzhou</addr-line><country>China</country></aff><aff id="aff2"><institution>School of Medicine, Zhejiang University</institution><addr-line>Hangzhou</addr-line><country>China</country></aff><aff id="aff3"><institution>Centre for Human Genetics, Nuffield Department of Medicine, University of Oxford</institution><addr-line>Roosevelt Drive</addr-line><addr-line>Oxford</addr-line><country>United Kingdom</country></aff><aff id="aff4"><institution>School of Mathematical Sciences, Zhejiang University</institution><addr-line>Hangzhou</addr-line><country>China</country></aff><aff id="aff5"><institution>School of Medicine, Children&#x2019;s Hospital of Zhejiang University</institution><addr-line>Hangzhou</addr-line><country>China</country></aff><aff id="aff6"><institution>National Clinical Research Center for Child Health</institution><addr-line>Hangzhou</addr-line><country>China</country></aff><aff id="aff7"><institution>National Regional Center for Children's Health</institution><addr-line>Hangzhou</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Leung</surname><given-names>Tiffany</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Andre Python, PhD, Centre for Human Genetics, Nuffield Department of Medicine, University of Oxford, Roosevelt Drive, Oxford, OX3 7BN, United Kingdom, 44 01865 287500; <email>andre.python@well.ox.ac.uk</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>24</day><month>9</month><year>2025</year></pub-date><volume>27</volume><elocation-id>e82729</elocation-id><history><date date-type="received"><day>20</day><month>08</month><year>2025</year></date><date date-type="rev-recd"><day>26</day><month>08</month><year>2025</year></date><date date-type="accepted"><day>29</day><month>08</month><year>2025</year></date></history><copyright-statement>&#x00A9; Andre Python, HongYi Li, Jun-Fen Fu. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 24.9.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2025/1/e82729"/><related-article related-article-type="commentary article" ext-link-type="doi" xlink:href="10.2196/81769" xlink:title="Comment on" xlink:type="simple">https://www.jmir.org/2025/1/e81769</related-article><related-article related-article-type="commentary article" ext-link-type="doi" xlink:href="10.2196/71916" xlink:title="Comment on" xlink:type="simple">https://www.jmir.org/2025/1/e71916</related-article><kwd-group><kwd>large language model</kwd><kwd>LLM</kwd><kwd>clinical</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>digital health</kwd><kwd>LLM review</kwd><kwd>review</kwd><kwd>letter</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>We thank the correspondent for engaging with our original work [<xref ref-type="bibr" rid="ref1">1</xref>] and raising constructive points in their Letter [<xref ref-type="bibr" rid="ref2">2</xref>].</p></sec><sec id="s2"><title>Citation Threshold Bias</title><p>We acknowledge that the citation criteria applied to select journals may exclude relevant studies from emerging or specialized venues. Our criteria were not only desirable but necessary to balance comprehensiveness with methodological quality considering the rapidly expanding literature. To mitigate the risk of omission of innovative research, we (1) screened and incorporated all relevant articles from main database platforms as well as e-prints and (2) made available an interactive online guideline offering an up-to-date guide to clinicians.</p></sec><sec id="s3"><title>Definition of &#x201C;Best Performance&#x201D;</title><p>We acknowledge the concerns associated with the performance comparison of models across heterogeneous contexts. To avoid ambiguity and misinterpretation, we stated and discussed in detail that, in our study, the term &#x201C;best performance&#x201D; is solely associated with the findings from the reviewed studies. Our analysis helps identify models successfully applied in clinical studies, without aiming at or implying comparison across domains. We direct readers to the excellent recent work by Liu et al [<xref ref-type="bibr" rid="ref3">3</xref>] for a comparison of lightweight large language models (LLMs) for medical tasks.</p></sec><sec id="s4"><title>Quality Assessment of the Included Studies</title><p>We carried out a thorough quality assessment following PRISMA guidelines [<xref ref-type="bibr" rid="ref4">4</xref>]. This might have escaped the correspondent&#x2019;s attention, as the details are provided in Multimedia Appendix 2 of our work [<xref ref-type="bibr" rid="ref1">1</xref>].</p></sec><sec id="s5"><title>Clinical Workflow</title><p>The suggested 5-stage workflow does not ignore nor intend to capture the complexity of clinical practice. Rather, it serves as a framework to associate the reported use of LLMs with tasks and processes familiar to clinicians, in line with a previous study [<xref ref-type="bibr" rid="ref5">5</xref>]. Our workflow offers a practical assessment of the role and extent of LLMs applied in clinically relevant sectors of activities and tasks.</p></sec><sec id="s6"><title>Clinical Validation Gap</title><p>We acknowledge and discuss the challenges in assessing the practicality of their deployment in clinical applications. Complementary to benchmarking LLMs on research datasets, our review covers studies using LLMs in both research and clinical settings. While we identified key challenges of LLMs in real-world applications, a comprehensive assessment of discrepancies between research and clinical settings is clearly beyond the scope.</p></sec><sec id="s7"><title>Safety and Risk Analyses</title><p>While our review discusses key concerns of the use of LLMs in clinical settings including hallucination risks and ethical considerations, a comprehensive risk assessment is beyond scope. Future research dedicated to tackle this key topic would require substantial efforts.</p></sec><sec id="s8"><title>Economic Evaluation</title><p>Our review assesses the associated costs of the graphics processing unit memory and its cooling requirements by process and clinical tasks. Our interactive online guideline will regularly incorporate future changes in the requirements and costs, as exemplified by the recent rise of lightweight LLMs that may offer excellent performance on consumer-grade hardware. However, a comprehensive cost-effectiveness or return-on-investment analysis is beyond the study scope.</p></sec><sec id="s9" sec-type="conclusions"><title>Conclusion</title><p>These observations are a timely reminder that our current understanding of the application of LLMs in clinical settings remains provisional and that we need continual reassessment of their current and future roles in health care practice.</p></sec></body><back><ack><p>We declare that no part of this submission has been generated by AI.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name><name name-style="western"><surname>Fu</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Python</surname><given-names>A</given-names> </name></person-group><article-title>Implementing large language models in health care: clinician-focused review with interactive guideline</article-title><source>J Med Internet Res</source><year>2025</year><month>07</month><day>11</day><volume>27</volume><fpage>e71916</fpage><pub-id pub-id-type="doi">10.2196/71916</pub-id><pub-id pub-id-type="medline">40644686</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weizman</surname><given-names>Z</given-names> </name></person-group><article-title>Critical limitations in systematic reviews of large language models in health care</article-title><source>J Med Internet Res</source><year>2025</year><volume>27</volume><fpage>e81769</fpage><pub-id pub-id-type="doi">10.2196/81769</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gu</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Application of large language models in medicine</article-title><source>Nat Rev Bioeng</source><year>2025</year><volume>3</volume><issue>6</issue><fpage>445</fpage><lpage>464</lpage><pub-id pub-id-type="doi">10.1038/s44222-025-00279-5</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Page</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Bossuyt</surname><given-names>PM</given-names> </name><etal/></person-group><article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title><source>BMJ</source><year>2021</year><month>03</month><day>29</day><volume>372</volume><fpage>n71</fpage><pub-id pub-id-type="doi">10.1136/bmj.n71</pub-id><pub-id pub-id-type="medline">33782057</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Betzler</surname><given-names>BK</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>CY</given-names> </name><etal/></person-group><article-title>Large language models and their impact in ophthalmology</article-title><source>Lancet Digit Health</source><year>2023</year><month>12</month><volume>5</volume><issue>12</issue><fpage>e917</fpage><lpage>e924</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(23)00201-7</pub-id><pub-id pub-id-type="medline">38000875</pub-id></nlm-citation></ref></ref-list></back></article>