<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e85726</article-id><article-id pub-id-type="doi">10.2196/85726</article-id><article-categories><subj-group subj-group-type="heading"><subject>Letter to the Editor</subject></subj-group></article-categories><title-group><article-title>Human-in-the-Loop as a Safety Guardrail: Clinical Accountability in the Large Language Model Era</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Zablah</surname><given-names>Isaac</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Molina</surname><given-names>Yolly</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Garcia-Loureiro</surname><given-names>Antonio</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Faculty of Medical Sciences, National Autonomous University of Honduras</institution><addr-line>Calle la Salud SN</addr-line><addr-line>Tegucigalpa</addr-line><country>Honduras</country></aff><aff id="aff2"><institution>Center for Biomedical Imaging Diagnostics Research and Rehabilitation, National Autonomous University of Honduras</institution><addr-line>Tegucigalpa</addr-line><country>Honduras</country></aff><aff id="aff3"><institution>Department of Electronics and Computer Science, Universidade de Santiago de Compostela</institution><addr-line>Santiago de Compostela</addr-line><country>Spain</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Isaac Zablah, PhD, Faculty of Medical Sciences, National Autonomous University of Honduras, Calle la Salud SN, Tegucigalpa, 11101, Honduras; <email>jose.zablah@unah.edu.hn</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>all authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>18</day><month>6</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e85726</elocation-id><history><date date-type="received"><day>12</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>08</day><month>05</month><year>2026</year></date></history><copyright-statement>&#x00A9; Isaac Zablah, Yolly Molina, Antonio Garcia-Loureiro. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 18.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e85726"/><related-article related-article-type="commentary article" ext-link-type="doi" xlink:href="10.2196/59069" xlink:title="Comment on" xlink:type="simple">https://www.jmir.org/2025/1/e59069</related-article><kwd-group><kwd>large language models</kwd><kwd>high-performance computing</kwd><kwd>medical informatics</kwd><kwd>computational efficiency</kwd><kwd>clinical decision support</kwd><kwd>artificial intelligence</kwd><kwd>healthcare infrastructure</kwd><kwd>model optimization</kwd></kwd-group></article-meta></front><body><p>We found Zhang et al&#x2019;s thorough review of the transformative potential of large language models (LLMs) in healthcare to be very interesting [<xref ref-type="bibr" rid="ref1">1</xref>]. The authors do a great job of talking about clinical applications, data integration, and ethical issues. However, we think that important aspects of computational performance need more attention, especially when it comes to using technology in real-world healthcare settings where resources are limited.</p><p>Zhang et al mention that &#x201C;<italic>technological advancements</italic>&#x201D; are helping to meet the &#x201C;<italic>high hardware requirements</italic>&#x201D; of LLMs [<xref ref-type="bibr" rid="ref1">1</xref>], but the reality of computing is still a huge challenge. Modern medical LLMs such as GPT-4 and domain-specific models such as Med-PaLM 2 need considerable infrastructure as described below [<xref ref-type="bibr" rid="ref2">2</xref>]:</p><list list-type="bullet"><list-item><p><italic>Inference latency</italic>: Currently, LLMs take 2 to 10 seconds to respond to each query. This may not be fast enough for clinical situations where time is of the essence, like triage in the emergency department or decision support during surgery. More detailed answers need more time [<xref ref-type="bibr" rid="ref3">3</xref>].</p></list-item><list-item><p><italic>Memory footprint</italic>: Models with billions of parameters need 16-80+ GB of VRAM (video random access memory) for fast inference [<xref ref-type="bibr" rid="ref4">4</xref>]. This means that many health care facilities, especially in low- and middle-income countries, do not have the specialized GPU infrastructure they need.</p></list-item><list-item><p><italic>Scalability challenges:</italic> Serving hundreds of concurrent clinical users requires distributed computing architectures and load-balancing strategies not discussed in the review [<xref ref-type="bibr" rid="ref5">5</xref>].</p></list-item></list><p>For edge computing and improving models, we suggest that subsequent research should emphasize:</p><list list-type="bullet"><list-item><p><italic>Model quantization and pruning:</italic> Techniques to reduce model size by 50%&#x2010;75% with minimal accuracy loss, enabling deployment on consumer-grade hardware.</p></list-item><list-item><p><italic>Edge computing solutions:</italic> Local deployment using optimized models (eg, 7-13B parameter variants) to address data privacy concerns while reducing latency and cloud dependency.</p></list-item><list-item><p><italic>Hybrid architecture:</italic> Combining lightweight edge models for routine queries with cloud-based full models for complex cases, optimizing the accuracy-efficiency trade-off.</p></list-item></list><p>The medical informatics community requires standardized metrics that assess not only diagnostic accuracy but also operations per diagnosis (computational cost), energy consumption per inference (environmental impact), and cost-effectiveness ratios (accuracy gained per dollar of infrastructure). We did an initial benchmarking of three LLMs on differential diagnosis tasks: Clinical Camel (LLaMA-2-13B), PMC-LLaMA 13B, and Meditron-3 (Qwen2.5-14B). We found that smaller, domain-specific models (~14 billion parameters fine-tuned on medical corpora) were able to achieve 85%&#x2010;90% of GPT-4&#x2019;s diagnostic accuracy while using only about 15% of the computational resources, indicating considerable room for improvement.</p><p>We want high-performance computing research in medical artificial intelligence (AI) to help with clinical implementation. This research should set benchmarks for both computational performance and clinical accuracy, come up with optimization techniques that are specific to medical inference workloads, create reference architectures for deploying LLMs in different health care settings, and investigate federated learning strategies that let training happen without putting sensitive patient data in one place.</p><p>The transformative potential Zhang et al describe will only be realized if LLMs can be deployed efficiently and equitably across diverse health care environments. High-performance computing and medical informatics must advance in tandem to bridge the gap between research promise and clinical reality.</p></body><back><ack><p>The authors used the Wordvice.ai service solely to improve the language and semantics of the manuscript.</p></ack><notes><sec><title>Funding</title><p>The authors declared no financial support was received for this work.</p></sec><sec><title>Data Availability</title><p>The benchmarking data comparing diagnostic accuracy and computational resource utilization of Clinical Camel (LLaMA-2-13B), PMC-LLaMA 13B, and Meditron-3 (Qwen2.5-14B) against GPT-4 baseline are available from the corresponding author upon reasonable request. The evaluation was conducted on publicly available differential diagnosis case datasets. Model access: Clinical Camel and PMC-LLaMA 13B are available via Hugging Face; Meditron-3 (Qwen2.5-14B) is available through the EPFL repository; and GPT-4 was accessed via OpenAI API for comparative benchmarking.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: AGL</p><p>Methodology: JZ</p><p>Validation: YM</p><p>Formal analysis: AGL</p><p>Writing &#x2014; original draft: JZ, YM</p><p>Writing &#x2014; review &#x0026; editing: AGL</p></fn><fn fn-type="conflict"><p>None declared.</p></fn><fn fn-type="other"><p><bold>Editorial Notice</bold></p><p>The corresponding author of &#x201C;Revolutionizing Health Care: The Transformative Impact of Large Language Models in Medicine&#x201D; declined to respond to this letter.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb3">VRAM</term><def><p>video random access memory</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>K</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Revolutionizing health care: the transformative impact of large language models in medicine</article-title><source>J Med Internet Res</source><year>2025</year><month>01</month><day>7</day><volume>27</volume><fpage>e59069</fpage><pub-id pub-id-type="doi">10.2196/59069</pub-id><pub-id pub-id-type="medline">39773666</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singhal</surname><given-names>K</given-names> </name><name name-style="western"><surname>Azizi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tu</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Large language models encode clinical knowledge</article-title><source>Nature New Biol</source><year>2023</year><month>08</month><volume>620</volume><issue>7972</issue><fpage>172</fpage><lpage>180</lpage><pub-id pub-id-type="doi">10.1038/s41586-023-06291-2</pub-id><pub-id pub-id-type="medline">37438534</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSJ</given-names> </name><name name-style="western"><surname>Elangovan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gutierrez</surname><given-names>L</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSW</given-names> </name></person-group><article-title>Large language models in medicine</article-title><source>Nat Med</source><year>2023</year><month>08</month><volume>29</volume><issue>8</issue><fpage>1930</fpage><lpage>1940</lpage><pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id><pub-id pub-id-type="medline">37460753</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raiaan</surname><given-names>MAK</given-names> </name><name name-style="western"><surname>Mukta</surname><given-names>MdSH</given-names> </name><name name-style="western"><surname>Fatema</surname><given-names>K</given-names> </name><etal/></person-group><article-title>A review on large language models: architectures, applications, taxonomies, open issues and challenges</article-title><source>IEEE Access</source><year>2024</year><volume>12</volume><fpage>26839</fpage><lpage>26874</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2024.3365742</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>K</given-names> </name><name name-style="western"><surname>Mao</surname><given-names>R</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>A survey of large language models for healthcare: from data, technology, and applications to accountability and ethics</article-title><source>arXiv</source><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2310.05694</pub-id></nlm-citation></ref></ref-list></back></article>