<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e82579</article-id><article-id pub-id-type="doi">10.2196/82579</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Initial Insights Into an Institutional Secure Large Language Model for Magnetic Resonance Imaging Examination Requests: Retrospective Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hallinan</surname><given-names>James Thomas Patrick Decourcy</given-names></name><degrees>MBChB</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Leow</surname><given-names>Naomi Wenxin</given-names></name><degrees>BComp, MComp</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Low</surname><given-names>Yi Xian</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Aric</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ong</surname><given-names>Wilson</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chan</surname><given-names>Matthew Ding Zhou</given-names></name><degrees>BmedSc, MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Devi</surname><given-names>Ganakirthana Kalpenya</given-names></name><degrees>MbBchBao</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>He</surname><given-names>Stephanie Shengjie</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Loh</surname><given-names>Daniel De-Liang</given-names></name><degrees>MBBS, MRCS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lim</surname><given-names>Desmond Shi Wei</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Low</surname><given-names>Xi Zhen</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lim</surname><given-names>Mei Chin</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yong</surname><given-names>Clement</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sng</surname><given-names>Weizhong Jonathan</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Teo</surname><given-names>Ee Chin</given-names></name><degrees>MMRT</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tan</surname><given-names>Jiong Hao</given-names></name><degrees>MBBS, MMed, MRCS</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kumar</surname><given-names>Naresh</given-names></name><degrees>MBBS, MS, DNB, DM</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Makmur</surname><given-names>Andrew</given-names></name><degrees>BmedSc, MBBS, MMed</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ting</surname><given-names>Yonghan</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Diagnostic Imaging, National University Hospital</institution><addr-line>5 Lower Kent Ridge Rd</addr-line><addr-line>Singapore</addr-line><country>Singapore</country></aff><aff id="aff2"><institution>Department of Diagnostic Radiology, Yong Loo Lin School of Medicine, National University of Singapore</institution><addr-line>Singapore</addr-line><country>Singapore</country></aff><aff id="aff3"><institution>Innovation Office, AI Office, National University Health System</institution><addr-line>Singapore</addr-line><country>Singapore</country></aff><aff id="aff4"><institution>National University Spine Institute, Department of Orthopedic Surgery, National University Hospital</institution><addr-line>Singapore</addr-line><country>Singapore</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Shivanna</surname><given-names>Abhishek</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Guellec</surname><given-names>Bastien Le</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Odezuligbo</surname><given-names>Ikenna</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to James Thomas Patrick Decourcy Hallinan, MBChB, Department of Diagnostic Imaging, National University Hospital, 5 Lower Kent Ridge Rd, Singapore, 119074, Singapore, 65 6908 2222; <email>james_hallinan@nuhs.edu.sg</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>7</day><month>4</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e82579</elocation-id><history><date date-type="received"><day>25</day><month>08</month><year>2025</year></date><date date-type="rev-recd"><day>20</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>24</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; James Thomas Patrick Decourcy Hallinan, Naomi Wenxin Leow, Yi Xian Low, Aric Lee, Wilson Ong, Matthew Ding Zhou Chan, Ganakirthana Kalpenya Devi, Stephanie Shengjie He, Daniel De-Liang Loh, Desmond Shi Wei Lim, Xi Zhen Low, Mei Chin Lim, Clement Yong, Weizhong Jonathan Sng, Ee Chin Teo, Jiong Hao Tan, Naresh Kumar, Andrew Makmur, Yonghan Ting. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 7.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e82579"/><abstract><sec><title>Background</title><p>Incomplete clinical details on magnetic resonance imaging (MRI) examination requests (MERs) can lead to suboptimal protocol selection. An institutional secure large language model (sLLM) with access to manually retrieved salient data from the electronic medical record (EMR) may improve request completeness and protocol accuracy across multiple MRI subspecialties.</p></sec><sec><title>Objective</title><p>The objective of this study was to compare clinician MERs with sLLM-augmented MERs for information quality and to evaluate the protocoling accuracy of the sLLM versus board-certified radiologists across body, musculoskeletal, and neuroradiology MRI.</p></sec><sec sec-type="methods"><title>Methods</title><p>This retrospective study included 608 random outpatient MRI examinations performed between September 2023 and July 2024 (body 206, musculoskeletal 203, neuroradiology 199). The cohort comprised 528 patients (mean 51.2 years, SD 19.2; range 4&#x2010;93; n=279, 52.8% women, n=249, 47.2% men). MERs without EMR access were excluded. A privately hosted Anthropic Claude 3.5 model (temperature 0) augmented each MER with manually retrieved salient EMR data and, via rule-based parsing, mapped the extracted elements onto predefined institutional criteria to recommend region or coverage and contrast use. Two experienced radiologists established a consensus reference standard. Two board-certified general radiologists (Rad 3 and Rad 4) and the sLLM were compared with this standard. Clinical information quality was graded using the Reason-for-Exam Imaging Reporting and Data System (RI-RADS). Interrater reliability was quantified with Gwet AC1. Paired accuracies were compared with the McNemar test to determine whether there was a statistically significant difference.</p></sec><sec sec-type="results"><title>Results</title><p>Interreader agreement for RI-RADS was almost perfect for sLLM-augmented MERs (AC1 0.97, 95% CI 0.94&#x2010;0.99) and moderate for clinician MERs (AC1 0.43, 95% CI 0.34&#x2010;0.52). Limited or deficient clinical information (RI-RADS C/D) fell to 0% to 0.7% (0/608 to 4/608) with sLLM augmentation vs 4.1% to 20.4% (25/608 to 124/608) for clinician MERs. Overall protocol accuracy was 93.1% (566/608; 95% CI 89.6&#x2010;96.6) for the sLLM, 91.4% (556/608; 95% CI 87.6&#x2010;95.3) for Rad 3, and 92.1% (560/608; 95% CI 88.4&#x2010;95.8) for Rad 4 (sLLM vs Rad 3 <italic>P</italic>=.23 vs Rad 4 <italic>P</italic>=.40). Region or coverage accuracy was similar (sLLM: 579/608, 95.2%; Rad 3: 585/608, 96.2%; Rad 4: 573/608, 94.2%; <italic>P</italic>=.46 and <italic>P</italic>=.36). Contrast decisions were more accurate using the sLLM at 94.4% (574/608; 95% CI 91.3&#x2010;97.5) vs Rad 3 at 92.1% (560/608; 95% CI 88.4&#x2010;95.8; <italic>P</italic>=.027) and were not significantly different to Rad 4 at 92.9% (565/608; 95% CI 89.4&#x2010;96.4; <italic>P</italic>=.16). Subspecialty analyses showed similar patterns, with the sLLM outperforming Rad 4 for musculoskeletal MRI contrast decisions (96.6% vs 91.1%; <italic>P</italic>=.006) and matching readers elsewhere. Manual review indicated that sLLM improvements arose from EMR details not listed on the MER (infection/inflammation, tumor history, prior surgery). No clinically significant hallucinations were identified in a manual review of discordant cases.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Across body, musculoskeletal, and neuroradiology MRI, sLLM-augmented examination requests improved clinical context and enhanced contrast selection while demonstrating accuracy comparable to general radiologists for region or coverage. Integrating sLLMs into routine vetting workflows may reduce manual workload in protocol selection for more efficient, standardized protocoling.</p></sec></abstract><kwd-group><kwd>secure large language model</kwd><kwd>radiology request form</kwd><kwd>reason for exam imaging reporting and data system</kwd><kwd>musculoskeletal imaging</kwd><kwd>body imaging</kwd><kwd>neuroradiology imaging</kwd><kwd>magnetic resonance imaging</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Radiology examination requests form the primary communication bridge between referring clinicians and radiology services. Completeness and clarity of these requests influence protocol selection, image quality, and ultimately diagnostic and therapeutic outcomes [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. When key clinical elements such as relevant history, current symptoms, or prior imaging are incomplete or absent, radiographers and radiologists may assign a suboptimal protocol or omit intravenous contrast, leading to lower accuracy, repeat studies, and additional cost [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. A recent systematic review showed that incomplete clinical information was associated with reduced reporting accuracy, clinical relevance, and reporting confidence [<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>The enhancement of radiology examination requests with data from the electronic medical record (EMR) offers a potential solution. Early rule-based decision-support tools reduced certain categories of inappropriate imaging yet may struggle to accommodate unstructured narratives and updated imaging protocols [<xref ref-type="bibr" rid="ref6">6</xref>]. Natural language processing pipelines improved specificity but required extensive engineering and did not fully interpret clinical context [<xref ref-type="bibr" rid="ref7">7</xref>]. Current large language models (LLMs) can ingest heterogeneous text, interpret medical terminology, and generate structured outputs at scale [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Proof-of-concept work has already demonstrated utility for report generation and national guideline concordance [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Nevertheless, many published studies rely on deidentified vignettes or synthetic notes, limiting direct clinical translation because of privacy constraints.</p><p>Secure LLMs (sLLMs) address these constraints by operating behind institutional firewalls while preventing the transmission of protected health information [<xref ref-type="bibr" rid="ref16">16</xref>]. Several studies have demonstrated their feasibility, including improving the adequacy of spine magnetic resonance imaging (MRI) request forms and increased protocol concordance for musculoskeletal studies [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Recent oncologic imaging work further indicates that a GPT-4 system can automatically generate structured clinical histories that radiologists prefer over original clinician-generated requisitions [<xref ref-type="bibr" rid="ref19">19</xref>]. Another recent study on GPT-4-generated MRI protocols showed notable quality in cardiac and neuroradiology imaging [<xref ref-type="bibr" rid="ref20">20</xref>]. An accompanying editorial emphasized that fatigue-free, verifiable LLM summaries could finally bridge the long-recognized information gap between referrers and radiologists, reinforcing the case for secure, institution-hosted models [<xref ref-type="bibr" rid="ref21">21</xref>].</p><p>Building on these findings, we evaluated a secure LLM across all routine MRI subspecialties, including body, neuroradiology, and musculoskeletal examinations. The study had 2 objectives: (1) to compare the information quality of clinician MRI examination requests (MERs) with those augmented by the sLLM using the Reason-for-Exam Imaging Reporting and Data System (RI-RADS) [<xref ref-type="bibr" rid="ref22">22</xref>] and (2) to assess protocol accuracy of the sLLM against both subspecialty and general radiologist readers.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>The Institutional Review Board (Domain-Specific Review Board reference: 2023/00919) classified the project as minimal risk and therefore granted a waiver of informed consent. Patient-identifying details have been omitted to maintain the privacy and confidentiality of patient data. Compensation was not required in view of the minimal risk of the project.</p></sec><sec id="s2-2"><title>Protocoling Pipeline and sLLM Prompt</title><p>Original clinician-submitted outpatient MERs performed between September 2023 and July 2024 were retrieved at random. MERs lacking corresponding EMR information were excluded. The musculoskeletal cohort in this study represents a distinct nonoverlapping dataset from that of a previous study by the authors. A target sample of approximately 600 requests (~200 per major subspecialty) was selected to provide adequate statistical power while maintaining operational feasibility across a diverse range of anatomical regions and pathologies. For every patient, a MER was generated by the sLLM based on the clinician request with relevant EMR content.</p><p>The institutional sLLM is a privately hosted instance of Anthropic Claude 3.5 on Amazon Bedrock. Model temperature was fixed at 0 to minimize output variability. For each request, the sLLM received (1) the most recent relevant clinical entry identified manually by the authors, based on the clinical service indicated on the request forms and (2) pertinent prior imaging reports (eg, CT neck for an MRI nasopharynx study). The typical input length of the clinical notes provided to the sLLM ranged from around 100 words for routine or straightforward presentations to around 3000 words for patients with complex oncological or multisystem disease. The sLLM was also given the latest institutional MRI protocol repository (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). No clinical data left the institutional firewall.</p><p>Two-step sLLM protocoling was performed:</p><list list-type="order"><list-item><p>Information enrichment&#x2014;the sLLM extracted key findings, working diagnoses, and potential MRI safety issues from both the MER and the EMR, returning a concise justification summary.</p></list-item><list-item><p>Protocol assignment&#x2014;using institutional rules and the enriched summary, the model selected the optimal region/coverage and determined whether contrast was required.</p></list-item></list><p>The estimated processing cost using the sLLM was US $3 per 1,000,000 tokens for the input and US $15 per 1,000,000 tokens for the output, giving a total cost of US $0.024 to 0.033 per request. For the cases requiring longer input length (about 5000 tokens), the per-case cost remained low at a few cents per request.</p><p>Importantly, the sLLM&#x2019;s primary role in this pipeline was clinical information extraction, summarizing clinically relevant details from the MER and EMR (eg, prior surgery, suspected infection, etc) and contextual interpretation. The final contrast and region decisions were not made autonomously by the sLLM. Instead, the extracted elements were subsequently mapped onto predefined institutional criteria for region or coverage and contrast use using a deterministic rule-based parsing script.</p><p>Contrast (gadolinium) administration was considered when any of the following were present:</p><list list-type="order"><list-item><p>History or suspicion of tumor, malignancy, or focal lesion</p></list-item><list-item><p>Known or suspected infective or inflammatory condition</p></list-item><list-item><p>Injury to neural structures</p></list-item><list-item><p>Previous surgery or spinal/extremity instrumentation</p></list-item><list-item><p>Explicit clinician request</p></list-item></list><p>A custom parsing script evaluated the sLLM response, which listed a &#x201C;yes&#x201D; or &#x201C;no&#x201D; decision for each contrast-relevant category together with the reason when applicable. If the script detected at least one positive flag and renal failure or other contraindications had not been recognized, the examination was classified as &#x201C;contrast required.&#x201D;</p><p>Detailed prompt wording and rule logic are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The rule definitions in this study were refined through prior work on LLMs in spine and musculoskeletal protocoling. All existing MRI protocols (including prompt design and contrast rules) were carefully reviewed with subspecialty leads and key team members to ensure alignment with institutional practice before doing the formal analysis. The full institutional MRI protocol table had a size compatible with the available context window and was embedded directly within the prompt, obviating the need for retrieval-augmented generation (RAG) in this study. No cases from the present study were used during prompt development.</p></sec><sec id="s2-3"><title>Evaluation Procedure</title><p>Two senior radiologists (Rad 1: 14 years of experience; Rad 2: 12 years of experience) independently graded the clinical adequacy of both clinician and sLLM MERs using the RI-RADS classification (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). They were blinded to the origin of each form.</p><p>Rad 1 and Rad 2 provided a consensus reference standard for protocoling. Against this consensus reference standard, protocol selections generated by the sLLM were then compared with those from 2 board-certified general radiologists (Rad 3 and Rad 4), who also had access to the same EMR entries, pertinent prior imaging reports, and institutional MRI protocols. The pertinent prior imaging reports were manually selected by study members, who were individuals distinct from Rad 1 and Rad 2. Both Rad 3 and Rad 4 had 2 years of experience and were commonly tasked with providing protocols based on MRI examination requests. Accuracy was credited only when the suggested region or coverage and contrast decision matched the consensus reference standard. Additional subanalyses were performed for each component and the major subspecialties. A manual review of disagreements between the sLLM and board-certified general radiologists against the consensus reference standard was carried out by the senior radiologists. Note was made of any hallucinations by the sLLM with clinically significant hallucinations defined as the sLLM producing clinically relevant details absent from the provided input data and capable of influencing protocol decisions [<xref ref-type="bibr" rid="ref23">23</xref>].</p></sec><sec id="s2-4"><title>Statistical Analysis</title><p>All computations were performed with Python 3.9.12. Two-sided tests were considered significant at <italic>P</italic>&#x003C;.05.</p><p>The quality of each MER was rated on a 4-level scale (RI-RADS grades A-D) by Rad 1 and Rad 2. Interreader agreement for these ordinal ratings, and later for MRI protocoling decisions (region/coverage and contrast requirement), was quantified with Gwet AC1, which is less sensitive than Cohen &#x03BA; to category imbalance (eg, the predominance of A/B grades) [<xref ref-type="bibr" rid="ref24">24</xref>]. Agreement was interpreted as poor (&#x003C;0), slight (0&#x2010;0.20), fair (0.21&#x2010;0.40), moderate (0.41&#x2010;0.60), substantial (0.61&#x2010;0.80), or almost perfect (0.81&#x2010;1). With more than 600 examinations, the study had greater than 90% power to detect a minimum 15% absolute difference in the proportion of clinically adequate MERs between clinician and sLLM versions.</p><p>Protocol selections generated by the sLLM and by 2 board-certified general radiologists (Rad 3 and Rad 4) were compared with the consensus reference standard established by Rad 1 and Rad 2. Overall accuracy was defined as the proportion of cases in which both region or coverage and contrast indication matched the reference standard. Differences in paired accuracies (sLLM vs each radiologist) were evaluated using the McNemar test with Yates continuity correction. Separate analyses were undertaken for region alone and contrast alone, and across the 3 major subspecialties (body, musculoskeletal, and neuroradiology). Ninety-five percent CIs for RI-RADS gradings (quality of clinical information) and protocoling were derived from the normal approximation to the binomial distribution. For proportions near 0% or 100%, the normal approximation to the binomial can be inaccurate. Therefore, for low-event RI-RADS C/D proportions (eg, sLLM MERs), 95% CIs were computed using the Clopper-Pearson exact method rather than the normal approximation.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Patient Demographics and Consensus MRI Protocols</title><p>Overall, 608 MRI examination requests were collected from 528 patients (mean 51.2 y, SD 19.2; range 4&#x2010;93 y). In total, 52.8% (279/528) patients were women and 47.2% (249/528) were men (<xref ref-type="table" rid="table1">Table 1</xref>). An additional 27 MRI examinations (26 patients) were excluded from analysis due to incomplete EMRs (eg, external referrals) (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Patient and magnetic resonance imaging examination characteristics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics</td><td align="left" valign="bottom">Values</td></tr></thead><tbody><tr><td align="left" valign="bottom" colspan="2">Age (y), mean (SD; range)</td></tr><tr><td align="left" valign="top">&#x2003;All (N=528)</td><td align="left" valign="top">51.2 (SD 19.2; 4-93)</td></tr><tr><td align="left" valign="top">&#x2003;Women (n=279)</td><td align="left" valign="top">52.9 (SD 17.4; 4-93)</td></tr><tr><td align="left" valign="top">&#x2003;Men (n=249)</td><td align="left" valign="top">49.3 (SD 21; 5-92)</td></tr><tr><td align="left" valign="top" colspan="2">MRI<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> study specialties (N=608), n (%)</td></tr><tr><td align="left" valign="top" colspan="2">&#x2003;Body MRI (n=206)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rectum/perineum fistula</td><td align="left" valign="top">28 (13.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Uterus/cervix cancer</td><td align="left" valign="top">26 (12.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Liver routine</td><td align="left" valign="top">21 (10.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Prostate routine</td><td align="left" valign="top">21 (10.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Enterography</td><td align="left" valign="top">16 (7.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Pancreas routine</td><td align="left" valign="top">14 (6.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">80 (38.8)</td></tr><tr><td align="left" valign="top" colspan="2">&#x2003;Musculoskeletal MRI (n=203)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lumbar spine</td><td align="left" valign="top">25 (12.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Cervical spine</td><td align="left" valign="top">24 (11.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Shoulder</td><td align="left" valign="top">29 (14.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Knee</td><td align="left" valign="top">27 (13.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Pelvis</td><td align="left" valign="top">11 (5.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">87 (42.9)</td></tr><tr><td align="left" valign="top" colspan="2">&#x2003;Neuroradiology MRI (n=199)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Brain + contrast</td><td align="left" valign="top">40 (20.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Orbits</td><td align="left" valign="top">18 (9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Skull base/temporal bones</td><td align="left" valign="top">15 (7.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Brain routine (noncontrast)</td><td align="left" valign="top">15 (7.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Brain stroke (acute)</td><td align="left" valign="top">14 (7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Brain MR angiography</td><td align="left" valign="top">12 (6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Pituitary dynamic</td><td align="left" valign="top">12 (6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Oral cavity/neck</td><td align="left" valign="top">12 (6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Nasopharynx and neck</td><td align="left" valign="top">10 (5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">51 (25.6)</td></tr><tr><td align="left" valign="top" colspan="2">MRI study type (N=608), n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Routine (noncontrast)</td><td align="left" valign="top">239 (39.3)</td></tr><tr><td align="left" valign="top">&#x2003;Contrast</td><td align="left" valign="top">369 (60.7)</td></tr><tr><td align="left" valign="top" colspan="2">Contrast use by specialty</td></tr><tr><td align="left" valign="top">&#x2003;Body MRI</td><td align="left" valign="top">Contrast 175 (85); noncontrast 31 (15)</td></tr><tr><td align="left" valign="top">&#x2003;Musculoskeletal MRI</td><td align="left" valign="top">Contrast 55 (27); noncontrast 148 (73)</td></tr><tr><td align="left" valign="top">&#x2003;Neuroradiology MRI</td><td align="left" valign="top">Contrast 139 (70); noncontrast 60 (30)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>MRI: magnetic resonance imaging.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Study design flowchart. The initial clinician-generated MERs were extracted and then augmented using an sLLM. The clinician and sLLM MERs were compared for the quality of the clinical information available using the RI-RADS grading scale by 2 experienced radiologists (Rads 1 and 2). The MRI protocol accuracy for the sLLM and 2 board-certified radiologists (Rads 3 and 4, both with 2 years of experience) was determined by comparison against a reference standard provided by Rads 1 and 2. Claude version 3.5 (Anthropic) was used. *Clinical data included the last clinical entry and imaging reports (eg, computed tomography (CT) of the neck for an MRI nasopharynx). The sLLM and all board-certified radiologists had access to the MRI protocol guidance provided on the hospital intranet. MERs: MRI examination requests; MRI: magnetic resonance imaging; RI-RADS: Reason-for-Exam Imaging Reporting and Data System; sLLM: secure large language model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e82579_fig01.png"/></fig><p>Out of the 608 MRI examinations analyzed, there was the following subspecialty breakdown: 206 (34%) body, 203 (33%) musculoskeletal, and 199 (33%) neuroradiology. Contrast was administered in 85% (175/206) of body scans, 27% (55/203) of musculoskeletal scans, and 70% (139/199) of neuroradiology scans, yielding 60.7% (369/608) of contrast studies overall. Within each specialty, the most common protocols were rectum or perineal fistula, uterus or cervix cancer, liver, and prostate studies for body MRI; lumbar and cervical spine along with knee and shoulder examinations for musculoskeletal MRI; and postcontrast brain, orbital, and skull-base studies for neuroradiology MRI (<xref ref-type="table" rid="table1">Table 1</xref>).</p></sec><sec id="s3-2"><title>Adequacy of the Radiology Request Forms</title><p>RI-RADS gradings for the MERs were performed independently by 2 experienced radiologists (Rad 1 and Rad 2) (<xref ref-type="table" rid="table2">Table 2</xref>). Interobserver agreement (AC1) was almost perfect for the sLLM-augmented MERs (AC1 0.97, 95% CI 0.94&#x2010;0.99) and moderate for the clinician MERs (AC1 0.43, 95% CI 0.34&#x2010;0.52).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Reason for exam imaging reporting and data system grades for the clinician and secure large language model&#x2013;augmented magnetic resonance imaging examination requests<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">RI-RADS<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> grade</td><td align="left" valign="bottom" colspan="2">Radiologist 1</td><td align="left" valign="bottom" colspan="2">Radiologist 2</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Clinician MERs<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup> (n=608), n (%, 95% CI)</td><td align="left" valign="bottom">sLLM<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup> MERs (n=608), n (%, 95% CI)</td><td align="left" valign="bottom">Clinician MERs (n=608), n (%, 95% CI)</td><td align="left" valign="bottom">sLLM MERs (n=608), n (%, 95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">A/B</td><td align="left" valign="top">484 (79.6, 95% CI 76.2&#x2010;82.8)</td><td align="left" valign="top">604 (99.3, 95% CI 98.3&#x2010;99.8)</td><td align="left" valign="top">583 (95.9, 95% CI 93.9&#x2010;97.3)</td><td align="left" valign="top">608 (100, 95% CI 99.51&#x2010;100)</td></tr><tr><td align="left" valign="top">C/D</td><td align="left" valign="top">124 (20.4, 95% CI 17.2&#x2010;23.6)</td><td align="left" valign="top">4 (0.7, exact 95% CI, Clopper-Pearson 0.2&#x2010;1.7)</td><td align="left" valign="top">25 (4.1, 95% CI 2.7&#x2010;6)</td><td align="left" valign="top">0 (0, exact 95% CI, Clopper-Pearson 0&#x2010;0.5)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Values are the number of studies, with 95% CIs in brackets. </p></fn><fn id="table2fn2"><p><sup>b</sup>RI-RADS: Reason-for-Exam Imaging Reporting and Data System.</p></fn><fn id="table2fn3"><p><sup>c</sup>MERs: magnetic resonance imaging examination requests.</p></fn><fn id="table2fn4"><p><sup>d</sup>sLLM: secure large language model.</p></fn></table-wrap-foot></table-wrap><p>Clinical information on the sLLM-augmented MERs was rated significantly higher than the original requests by both radiologists. For Rad 1, clinician MERs had 484/608 rated A/B (79.6%; 95% CI 76.2%-82.8%) and 124/608 rated C/D (20.4%; 95% CI 17.2%-23.6%), whereas sLLM MERs had 604/608 rated A/B (99.3%; 95% CI 98.3%-99.8%) and 4/608 rated C/D (0.7%; exact 95% CI, Clopper-Pearson, 0.2%-1.7%). For Rad 2, clinician MERs had 583/608 rated A/B (95.9%; 95% CI 93.9%-97.3%) and 25/608 rated C/D (4.1%; 95% CI 2.7%-6.0%), while sLLM MERs had all 608/608 rated A/B (100%; 95% CI 99.51%-100%) and 0/608 rated C/D (0%; exact 95% CI, Clopper-Pearson, 0%-0.5%).</p><p>No clinically significant hallucinations were identified in the sLLM outputs. Notably, the sLLM-generated MERs converted 28 spine MRI referrals (28/608, 4.6%) that were rated limited or deficient (7 of which omitted prior spinal surgery) into requests containing adequate clinical details.</p></sec><sec id="s3-3"><title>MRI Protocoling Accuracy Comparison</title><p>For protocoling, experienced radiologists 1 and 2 showed almost perfect agreement on the region (AC1 0.97, 95% CI 0.96&#x2010;0.99) and need for contrast (AC1 0.99, 95% CI 0.99&#x2010;1). A consensus protocol was determined for all 608 cases.</p><p>For all 608 MRI examinations, the sLLM matched the subspecialty reference standard in 566 (93.1%, 95% CI 89.6&#x2010;96.6) cases overall, compared with 556 (91.4%, 95% CI 87.6&#x2010;95.3) for Rad 3 and 560 (92.1%, 95% CI 88.4&#x2010;95.8) for Rad 4 (<xref ref-type="table" rid="table3">Table 3</xref>). This difference was not significant for the sLLM vs Rad 3 (<italic>P</italic>=.23) and Rad 4 (<italic>P</italic>=.40). Accuracy for region or coverage selection was similar across readers (sLLM: 579/608, 95.2%; Rad 3: 585/608, 96.2%; Rad 4: 573/608, 94.2%) with no significant differences between the sLLM vs Rad 3 (<italic>P</italic>=.46) and Rad 4 (<italic>P</italic>=.36). The sLLM, coupled with the use of the deterministic rules-based parsing script, demonstrated a slight advantage for contrast determination, being correct in 94.4% (574/608; 95% CI 91.3&#x2010;97.5) studies vs 92.1% (560/608; 95% CI 88.4&#x2010;95.8) for Rad 3 (<italic>P</italic>=.027) and 92.9% (565/608; 95% CI 89.4&#x2010;96.4) for Rad 4 (<italic>P</italic>=.16). Manual review of disagreements indicated that the sLLM correctly detected infection or inflammation in 2 cases (eg, suspected bursitis around the hip and shoulder), possible nerve lesions in 2 cases, and prior spine surgical details in 1 case, which were documented in the electronic record but not stated on the MER. These details triggered appropriate contrast recommendations that were omitted by at least 1 radiologist.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Magnetic resonance imaging protocoling accuracy for the secure large language model and board-certified radiologists vs the reference standard.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">MRI<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> protocol vs reference standard</td><td align="left" valign="bottom">Radiologist 3</td><td align="left" valign="bottom">Radiologist 4</td><td align="left" valign="bottom">sLLM<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">All MRI studies (N=608), n (%, 95% CI)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Overall protocol correct</td><td align="left" valign="top">556 (91.4, 95% CI 87.6&#x2010;95.3)</td><td align="left" valign="top">560 (92.1, 95% CI 88.4&#x2010;95.8)</td><td align="left" valign="top">566 (93.1, 95% CI 89.6&#x2010;96.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Region correct</td><td align="left" valign="top">585 (96.2, 95% CI 93.6&#x2010;98.8)</td><td align="left" valign="top">573 (94.2, 95% CI 91.1&#x2010;97.4)</td><td align="left" valign="top">579 (95.2, 95% CI 92.3&#x2010;98.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Contrast decision correct</td><td align="left" valign="top">560 (92.1, 95% CI 88.4&#x2010;95.8)</td><td align="left" valign="top">565 (92.9, 95% CI 89.4&#x2010;96.4)</td><td align="left" valign="top">574 (94.4, 95% CI 91.3&#x2010;97.5)</td></tr><tr><td align="left" valign="top" colspan="4">Subspecialty subsets</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Musculoskeletal (n=203), n (%, 95% CI)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Overall protocol correct</td><td align="left" valign="top">186 (91.6, 95% CI 87.8&#x2010;95.4)</td><td align="left" valign="top">180 (88.7, 95% CI 84.3&#x2010;93)</td><td align="left" valign="top">187 (92.1, 95% CI 88.4&#x2010;95.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Region correct</td><td align="left" valign="top">199 (98, 95% CI 96.1&#x2010;99.9)</td><td align="left" valign="top">182 (89.7, 95% CI 85.5&#x2010;93.8)</td><td align="left" valign="top">194 (95.6, 95% CI 92.7&#x2010;98.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Contrast decision correct</td><td align="left" valign="top">190 (93.6, 95% CI 90.2&#x2010;97)</td><td align="left" valign="top">185 (91.1, 95% CI 87.2&#x2010;95)</td><td align="left" valign="top">196 (96.6, 95% CI 94&#x2010;99.1)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Neuroradiology (n=199), n (%, 95% CI)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Overall protocol correct</td><td align="left" valign="top">184 (92.5, 95% CI 88.8&#x2010;96.1)</td><td align="left" valign="top">189 (95, 95% CI 91.9&#x2010;98)</td><td align="left" valign="top">185 (93, 95% CI 89.4&#x2010;96.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Region correct</td><td align="left" valign="top">189 (95, 95% CI 91.9&#x2010;98)</td><td align="left" valign="top">194 (97.5, 95% CI 95.3&#x2010;99.7)</td><td align="left" valign="top">190 (95.5, 95% CI 92.6&#x2010;98.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Contrast decision correct</td><td align="left" valign="top">184 (92.5, 95% CI 88.8&#x2010;96.1)</td><td align="left" valign="top">189 (95, 95% CI 91.9&#x2010;98)</td><td align="left" valign="top">184 (92.5, 95% CI 88.8&#x2010;96.1)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Body (n=206), n (%; 95% CI)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Overall protocol correct</td><td align="left" valign="top">186 (90.3, 95% CI 86.2&#x2010;94.3)</td><td align="left" valign="top">191 (92.7, 95% CI 89.2&#x2010;96.3)</td><td align="left" valign="top">194 (94.2, 95% CI 91&#x2010;97.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Region correct</td><td align="left" valign="top">197 (95.6, 95% CI 92.8&#x2010;98.4)</td><td align="left" valign="top">197 (95.6, 95% CI 92.8&#x2010;98.4)</td><td align="left" valign="top">195 (94.7, 95% CI 91.6&#x2010;97.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Contrast decision correct</td><td align="left" valign="top">186 (90.3, 95% CI 86.2&#x2010;94.3)</td><td align="left" valign="top">191 (92.7, 95% CI 89.2&#x2010;96.3)</td><td align="left" valign="top">194 (94.2, 95% CI 91&#x2010;97.4)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>MRI: magnetic resonance imaging.</p></fn><fn id="table3fn2"><p><sup>b</sup>sLLM: secure large language model.</p></fn></table-wrap-foot></table-wrap><p>Within the musculoskeletal subset (N=203 examinations), overall agreement with the reference standard was 187 (92.1%, 95% CI 88.4&#x2010;95.8) for the sLLM, 186 (91.6%, 95% CI 87.8&#x2010;95.4) for Rad 3, and 180 (88.7%, 95% CI 84.3&#x2010;93) for Rad 4. These differences were not significant (<italic>P</italic>&#x003E;.99 and <italic>P</italic>=.15 for sLLM vs Rad 3 and Rad 4, respectively). The sLLM matched Rad 3 for region accuracy (n=194, 95.6% vs 199, 98%; <italic>P</italic>=.23) and exceeded Rad 4 (n=182, 89.7%; <italic>P</italic>=.018) (<xref ref-type="table" rid="table3">Table 3</xref>). For contrast, coupled with the use of the deterministic rules-based parsing script, the sLLM was correct in 196 (96.6%, 95% CI 94&#x2010;99.1) studies, outperforming Rad 4 alone (n=185, 91.1%; <italic>P</italic>=.006) with no significant difference compared to Rad 3 (n=190, 93.6%; <italic>P</italic>=.099).</p><p>For body MRI (N=206 examinations), the sLLM had the highest overall accuracy (n=194, 94.2%, 95% CI 91&#x2010;97.4), although this was not significantly different vs Rad 3 (n=186, 90.3%; <italic>P</italic>=.089) and Rad 4 (n=191, 92.7%<italic>; P</italic>=.33). Review of body MRI discrepancies indicated that the sLLM recommended more focused uterus or cervix and MRI rectum and perineum protocols in 2 cases compared to a more general pelvis by the 2 radiologists. For contrast, the sLLM had the highest accuracy (n=194, 94.2%, 95% CI 91&#x2010;97.4), although this was not significantly different vs Rad 3 (n=186, 90.3%; <italic>P</italic>=.09) and Rad 4 (n=191, 92.7%; <italic>P</italic>=.33).</p><p>In neuroradiology MRI (N=199 examinations), Rad 4 achieved the highest overall accuracy (n=189, 95%, 95% CI 91.9&#x2010;98), although this was not significant vs the sLLM (n=185, 93%; <italic>P</italic>=.50) and Rad 3 (n=184, 92.5%; <italic>P</italic>=.71). Review of neuroradiology discrepancies indicated that the sLLM occasionally recommended skull-base protocols in 5 cases with suspected cranial nerve pathologies based on the clinical notes, whereas radiologists 3 and 4 accepted the original brain request when they deemed that coverage was adequate. For contrast, Rad 4 achieved the highest accuracy (n=189, 95%, 95% CI 91.9&#x2010;98), although this was not significant vs the sLLM (n=184, 92.5%; <italic>P</italic>=.30) and Rad 3 (n=184, 92.5%; <italic>P</italic>=.32) which were closely aligned.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this study, we compared MERs provided by the referring clinician with those augmented by our institutional sLLM. Our results show that an sLLM can upgrade the clinical usefulness of MERs and aid protocol selection across body, neuroradiology, and musculoskeletal practice. After augmentation, fewer than 1% of requests (0%&#x2010;0.7%) were graded deficient or limited (RI-RADS C or D), compared with up to 1 in 5 (4.1%&#x2010;20.4%) of the clinician originals, and interreader agreement rose from moderate to almost perfect.</p><p>The sLLM protocoling accuracy was compared with that of 2 board-certified junior general radiologists. Overall protocol accuracy of the sLLM (566/608, 93.1%) was close to that of the 2 board-certified radiologists (Rad 3: 556/608, 91.4%; <italic>P</italic>=.23 and Rad 4: 560/608, 92.1%; <italic>P</italic>=.40), although the study was not powered to test statistical noninferiority or equivalence. Observed accuracy differences should not be interpreted as evidence of statistical similarity but instead as descriptive comparisons indicating that the sLLM operated within the performance range of general radiologists on this dataset.</p><p>For contrast decisions, the sLLM demonstrated accuracy of 94.4% (574/608), which was superior to Rad 3 (560/608, 92.1%; <italic>P</italic>=.027) and was not significantly different from Rad 4 (565/608, 92.9%; <italic>P</italic>=.16). Manual review showed that slightly increased accuracy for the sLLM was driven by EMR information (correct detection of prior surgery, infection, or tumor history) that did not appear on the MER yet was important for protocol selection. In our pipeline, the sLLM&#x2019;s role was to extract and summarize these clinically relevant details, while the final region and contrast decisions were made by a deterministic rule-based script using predefined institutional criteria. Importantly, protocol selection and contrast determination are treated as separate decision steps. Protocol names reflect the anatomical region and clinical indication, while contrast administration may be modified when an explicit clinician request for noncontrast imaging or a documented contraindication is detected. In such cases, the default contrast setting associated with a protocol may be overridden to preserve clinician intent and patient safety. As such, the observed accuracy reflects improved information extraction feeding into consistent rules, rather than autonomous clinical reasoning by the sLLM.</p><p>In one of this study&#x2019;s cases, the clinician requested noncontrast MRI for cervical cancer recurrence due to renal impairment of estimated glomerular filtration rate 31&#x2010;48, although modern guidelines may permit the use of group II gadolinium agents at this level. We acknowledge that prioritizing clinician intent in such cases may risk suboptimal diagnostic utility and potential recall.</p><p>Most mismatches against the reference standard occurred in borderline cases where more than one subspecialty-specific protocol was reasonable. In body MRI, this typically involved the sLLM proposing a general pelvis study when radiologists selected a focused protocol such as prostate, uterus or cervix, or rectum. In spine MRI, a small number of discrepancies arose from consolidating concurrent cervical and lumbar requests into a single-region study. In neuroradiology, differences usually reflected choosing between brain vs skull base or temporal bone coverage in suspected cranial nerve pathology. These patterns suggest that minor refinements to protocol-selection rules could help reduce discrepancies.</p></sec><sec id="s4-2"><title>Comparison to Prior Work</title><p>These findings are consistent with recent evaluations of LLMs for noninterpretive tasks in radiology and add multisubspecialty evidence. Prior institutional studies using an sLLM for spine and musculoskeletal MRI reported similar gains, with musculoskeletal protocoling accuracy reaching 96% and exceeding 2 general readers at 88% and 89% [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. In another recent study, &#x00C7;amur et al [<xref ref-type="bibr" rid="ref25">25</xref>] showed that 4 LLMs have strong potential for selecting appropriate imaging modalities. The 4 LLMs were tested on 240 clinical cases (120 ACR Appropriateness Criteria and 120 realistic scenarios) and compared their choices with 4 clinicians and 4 radiologists. The best model picked the correct imaging test in 98.3% (236/240) of ACR cases, matched a junior radiologist on realistic cases, gave identical answers across prompts (&#x03BA;=1), and showed moderate to good reproducibility over time (&#x03BA;~0.77&#x2010;0.89 short term; 0.51&#x2010;0.79 long term) [<xref ref-type="bibr" rid="ref25">25</xref>]. Similarly, for CT protocol assignment, a fine-tuned support tool picked the right protocol on the first choice 92.3% of the time and within its top two 96.3% of the time, with an average processing time of less than 1 second per case. When clinicians used it, resident accuracy improved from 0.913 to 0.936 with a 14% reduction in reading time, and attending accuracy increased from 0.920 to 0.926 with a 12% time saving [<xref ref-type="bibr" rid="ref26">26</xref>]. In musculoskeletal MRI, a GPT-4 system linked to a small knowledge base reached 92.86% accuracy on ACR-based cases, outperformed a baseline model and standard GPT-4, matched most subspecialists, and was better at flagging when the clinical information was insufficient, which mirrors our MER enrichment step [<xref ref-type="bibr" rid="ref27">27</xref>]. Beyond protocol choice, a radiology operations study showed GPT-4 routed 96% of in-scope procedure requests and 76% of out-of-scope requests correctly at a cost of approximately US $0.03 per request, indicating potential for increased efficiency and cost benefits for routing workflows [<xref ref-type="bibr" rid="ref28">28</xref>]. Together with our results, these data suggest that institution-hosted sLLMs could aid improvements in clinical context, standardize protocol decisions, reduce unnecessary contrast and radiation, and potentially save time and costs by reducing protocoling time and reschedules.</p></sec><sec id="s4-3"><title>Limitations and Future Directions</title><p>Our study has several limitations. First, this was a single-center, retrospective evaluation involving only outpatient MERs. The sLLM pipeline relied on the specific institutional MRI protocol repository and access to the EMR structure, and the realized gains may differ in institutions using different order-entry systems or protocol libraries. Future work should include prospective, multicenter evaluations that measure operational outcomes (time to scan, rescheduling, repeat imaging), cost-effectiveness analyses that incorporate staff time and scanner utilization, and extension of the sLLM pipeline to CT and ultrasound.</p><p>Second, some patients contributed multiple examinations, and the dataset lacked patient-linked identifiers; therefore, cluster-robust or mixed-effects analyses were not feasible. Results should be interpreted at the examination level, with future work planned to incorporate patient-level clustering.</p><p>Third, formatting differences between the clinician and augmented MRI examination requests could have partially unblinded the graders, with the potential to artificially inflate the sLLM&#x2019;s RI-RADS scores, although the direction of any resulting bias is uncertain. In our study, the output format (subheadings; eg, clinical history, reason for exam) was identical for the clinician and sLLM forms, although differences in language and style may have unblinded the reviewers. This is difficult to rectify but could be addressed in future studies using standardized outputs.</p><p>Fourth, the exclusion of cases with incomplete electronic records introduces a potential selection bias, as these omitted cases may represent some of the more challenging instances for protocoling. In addition, the manual extraction of the latest relevant clinical entry and pertinent prior imaging reports without an automated failsafe mechanism provided a methodological simplification, which is different from real-life human EMR review, which involves searching through all relevant notes and prior imaging. Together, these could potentially inflate the sLLM&#x2019;s protocoling accuracy and are an important area for future improvement. Future scaling may also benefit from approaches such as hierarchical summarization or RAG-style chunking for very large EMR entries.</p><p>Fifth, we assessed board-certified radiologists who demonstrated high accuracies for protocoling (&#x003E;91.4%), leaving little room for the sLLM to improve on this. LLM assistance for less experienced readers, including technologists, could provide more value and will need to be assessed. Nonetheless, even if accuracy gains over experts are marginal, the standardization benefit and fatigue reduction offered by the sLLM support its deployment. A further improvement may include an &#x201C;assisted radiologist with sLLM&#x201D; arm to separate automation from augmentation.</p><p>Sixth, although the study included more than 600 MRI examinations, the sample size was powered for detecting a 15% difference in clinical-information adequacy, not for establishing noninferiority or equivalence in protocol accuracy. The small absolute differences observed between the sLLM and board-certified radiologists (eg, 93.1% vs 91.4%; <italic>P</italic>=.23) therefore cannot be interpreted as statistical equivalence.</p><p>Seventh, we deliberately confined the evaluation to MRI, which is our most protocol-intensive modality and the largest component of departmental workload, but acknowledge that CT and ultrasound will require separate validation prior to broader application.</p><p>Eighth, the review for clinically significant hallucinations was limited to discordant cases, and there is a theoretical risk that the sLLM could generate benign or &#x201C;silent&#x201D; hallucinations in the clinical summary that do not alter the protocol decision but could still degrade the medical record.</p><p>Finally, although no clinically significant hallucinations were observed, it is well known that LLMs are prone to producing factual errors that could have major clinical implications [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. In addition, recent work shows that LLMs can be vulnerable to adversarial hallucination in clinical decision-support settings, with mitigation prompts only partially reducing errors and lower temperature settings providing no meaningful benefit [<xref ref-type="bibr" rid="ref31">31</xref>]. Continuous monitoring and postdeployment guardrails therefore remain essential, but were beyond the scope of this study.</p></sec><sec id="s4-4"><title>Conclusion</title><p>In this multisubspecialty MRI cohort, an sLLM improved MRI examination request completeness and demonstrated protocol accuracy comparable to experienced radiologists; however, the study was not powered to establish noninferiority or equivalence for protocol accuracy.</p><p>The end-to-end sLLM pipeline, consisting of request form enrichment with EMR data and a rule-based protocoling technique, offers a practical pathway for more efficient, standardized protocoling while reducing the administrative burden on clinicians.</p></sec></sec></body><back><ack><p>The author(s) attest that there was no use of generative artificial intelligence technologies in the generation of text, figures, or other informational content of this manuscript.</p></ack><notes><sec><title>Funding</title><p>This study received support from the Singapore Ministry of Health National Medical Research Council under the NMRC Clinician Innovator Award (CIA). The grant was awarded for the project titled "From Prototype to Full Deployment: A Comprehensive Deep Learning Pipeline for Whole-Spine MRI&#x201D; (Grant ID: CIAINV25jan-0005 J.T.P.D.H).</p></sec><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: JTPDH, NWL, YXL</p><p>Data curation: JTPDH, NWL</p><p>Formal analysis: JTPDH, NWL</p><p>Funding acquisition: JTPDH</p><p>Investigation: JTPDH, NWL, YXL</p><p>Methodology: JTPDH, NWL, YXL</p><p>Project administration: JTPDH, NWL, YXL</p><p>Resources: JTPDH, NWL, YXL, AL, WO, MDZC, GKD, XZL, MCL, CY, WJS, ECT, JHT, NK, AM, YT</p><p>Software: JTPDH, NWL, YXL, AL, WO, MDZC, GKD, XZL, MCL, CY, WJS, ECT, JHT, NK, AM, YT</p><p>Supervision: JTPDH</p><p>Validation: JTPDH, NWL, YXL, AL, WO, MDZC, GKD, XZL, MCL, CY, WJS, ECT, JHT, NK, AM, YT</p><p>Visualization: JTPDH, NWL, YXL</p><p>Writing &#x2013; original draft: JTPDH, NWL, YXL</p><p>Writing &#x2013; reviewing &#x0026; editing: JTPDH, NWL, YXL, AL, WO, MDZC, GKD, XZL, MCL, CY, WJS, ECT, JHT, NK, AM, YT</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">EMR</term><def><p>electronic medical record</p></def></def-item><def-item><term id="abb2">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb3">MERs</term><def><p>MRI examination requests</p></def></def-item><def-item><term id="abb4">MRI</term><def><p>magnetic resonance imaging</p></def></def-item><def-item><term id="abb5">RAG</term><def><p>retrieval-augmented generation</p></def></def-item><def-item><term id="abb6">RI-RADS</term><def><p>Reason-for-Exam Imaging Reporting and Data System</p></def></def-item><def-item><term id="abb7">sLLM</term><def><p>secure large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lehnert</surname><given-names>BE</given-names> </name><name name-style="western"><surname>Bree</surname><given-names>RL</given-names> </name></person-group><article-title>Analysis of appropriateness of outpatient CT and MRI referred from primary care clinics at an academic medical center: how critical is the need for improved decision support?</article-title><source>J Am Coll Radiol</source><year>2010</year><month>03</month><volume>7</volume><issue>3</issue><fpage>192</fpage><lpage>197</lpage><pub-id pub-id-type="doi">10.1016/j.jacr.2009.11.010</pub-id><pub-id pub-id-type="medline">20193924</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bernardy</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ullrich</surname><given-names>CG</given-names> </name><name name-style="western"><surname>Rawson</surname><given-names>JV</given-names> </name><etal/></person-group><article-title>Strategies for managing imaging utilization</article-title><source>J Am Coll Radiol</source><year>2009</year><month>12</month><volume>6</volume><issue>12</issue><fpage>844</fpage><lpage>850</lpage><pub-id pub-id-type="doi">10.1016/j.jacr.2009.08.003</pub-id><pub-id pub-id-type="medline">19945039</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>G Pitman</surname><given-names>A</given-names> </name></person-group><article-title>Quality of referral: what information should be included in a request for diagnostic imaging when a patient is referred to a clinical radiologist?</article-title><source>J Med Imaging Radiat Oncol</source><year>2017</year><month>06</month><volume>61</volume><issue>3</issue><fpage>299</fpage><lpage>303</lpage><pub-id pub-id-type="doi">10.1111/1754-9485.12577</pub-id><pub-id pub-id-type="medline">28139044</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barakzai</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Sheer</surname><given-names>ZZ</given-names> </name><name name-style="western"><surname>Muhammad</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Evaluation of radiology request forms in a tertiary care hospital: an audit with a focus on the impact of technological intervention</article-title><source>Cureus</source><year>2021</year><month>02</month><day>14</day><volume>13</volume><issue>2</issue><fpage>e13335</fpage><pub-id pub-id-type="doi">10.7759/cureus.13335</pub-id><pub-id pub-id-type="medline">33747644</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Castillo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Steffens</surname><given-names>T</given-names> </name><name name-style="western"><surname>Sim</surname><given-names>L</given-names> </name><name name-style="western"><surname>Caffery</surname><given-names>L</given-names> </name></person-group><article-title>The effect of clinical information on radiology reporting: a systematic review</article-title><source>J Med Radiat Sci</source><year>2021</year><month>03</month><volume>68</volume><issue>1</issue><fpage>60</fpage><lpage>74</lpage><pub-id pub-id-type="doi">10.1002/jmrs.424</pub-id><pub-id pub-id-type="medline">32870580</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ip</surname><given-names>IK</given-names> </name><name name-style="western"><surname>Schneider</surname><given-names>L</given-names> </name><name name-style="western"><surname>Seltzer</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Impact of provider-led, technology-enabled radiology management program on imaging</article-title><source>Am J Med</source><year>2013</year><month>08</month><volume>126</volume><issue>8</issue><fpage>687</fpage><lpage>692</lpage><pub-id pub-id-type="doi">10.1016/j.amjmed.2012.11.034</pub-id><pub-id pub-id-type="medline">23786668</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trivedi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Mesterhazy</surname><given-names>J</given-names> </name><name name-style="western"><surname>Laguna</surname><given-names>B</given-names> </name><name name-style="western"><surname>Vu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Sohn</surname><given-names>JH</given-names> </name></person-group><article-title>Automatic determination of the need for intravenous contrast in musculoskeletal MRI examinations using IBM Watson&#x2019;s natural language processing algorithm</article-title><source>J Digit Imaging</source><year>2018</year><month>04</month><volume>31</volume><issue>2</issue><fpage>245</fpage><lpage>251</lpage><pub-id pub-id-type="doi">10.1007/s10278-017-0021-3</pub-id><pub-id pub-id-type="medline">28924815</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhayana</surname><given-names>R</given-names> </name></person-group><article-title>Chatbots and large language models in radiology: a practical primer for clinical and research applications</article-title><source>Radiology</source><year>2024</year><month>01</month><volume>310</volume><issue>1</issue><fpage>e232756</fpage><pub-id pub-id-type="doi">10.1148/radiol.232756</pub-id><pub-id pub-id-type="medline">38226883</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gertz</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Bunck</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Lennartz</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 for automated determination of radiological study and protocol based on radiology request forms: a feasibility study</article-title><source>Radiology</source><year>2023</year><month>06</month><volume>307</volume><issue>5</issue><fpage>e230877</fpage><pub-id pub-id-type="doi">10.1148/radiol.230877</pub-id><pub-id pub-id-type="medline">37310247</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barash</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Klang</surname><given-names>E</given-names> </name><name name-style="western"><surname>Konen</surname><given-names>E</given-names> </name><name name-style="western"><surname>Sorin</surname><given-names>V</given-names> </name></person-group><article-title>ChatGPT-4 assistance in optimizing emergency department radiology referrals and imaging selection</article-title><source>J Am Coll Radiol</source><year>2023</year><month>10</month><volume>20</volume><issue>10</issue><fpage>998</fpage><lpage>1003</lpage><pub-id pub-id-type="doi">10.1016/j.jacr.2023.06.009</pub-id><pub-id pub-id-type="medline">37423350</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>CK</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>SS</given-names> </name></person-group><article-title>Large language models: a guide for radiologists</article-title><source>Korean J Radiol</source><year>2024</year><month>02</month><volume>25</volume><issue>2</issue><fpage>126</fpage><lpage>133</lpage><pub-id pub-id-type="doi">10.3348/kjr.2023.0997</pub-id><pub-id pub-id-type="medline">38288895</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Ong</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kennedy</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Evaluating GPT4 on impressions generation in radiology reports</article-title><source>Radiology</source><year>2023</year><month>06</month><volume>307</volume><issue>5</issue><fpage>e231259</fpage><pub-id pub-id-type="doi">10.1148/radiol.231259</pub-id><pub-id pub-id-type="medline">37367439</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mese</surname><given-names>I</given-names> </name><name name-style="western"><surname>Taslicay</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Sivrioglu</surname><given-names>AK</given-names> </name></person-group><article-title>Improving radiology workflow using ChatGPT and artificial intelligence</article-title><source>Clin Imaging</source><year>2023</year><month>11</month><volume>103</volume><fpage>109993</fpage><pub-id pub-id-type="doi">10.1016/j.clinimag.2023.109993</pub-id><pub-id pub-id-type="medline">37812965</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mukherjee</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hou</surname><given-names>B</given-names> </name><name name-style="western"><surname>Lanfredi</surname><given-names>RB</given-names> </name><name name-style="western"><surname>Summers</surname><given-names>RM</given-names> </name></person-group><article-title>Feasibility of using the privacy-preserving large language model vicuna for labeling radiology reports</article-title><source>Radiology</source><year>2023</year><month>10</month><volume>309</volume><issue>1</issue><fpage>e231147</fpage><pub-id pub-id-type="doi">10.1148/radiol.231147</pub-id><pub-id pub-id-type="medline">37815442</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rau</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rau</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zoeller</surname><given-names>D</given-names> </name><etal/></person-group><article-title>A context-based chatbot surpasses trained radiologists and generic ChatGPT in following the ACR appropriateness guidelines</article-title><source>Radiology</source><year>2023</year><month>07</month><volume>308</volume><issue>1</issue><fpage>e230970</fpage><pub-id pub-id-type="doi">10.1148/radiol.230970</pub-id><pub-id pub-id-type="medline">37489981</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cai</surname><given-names>W</given-names> </name></person-group><article-title>Feasibility and prospect of privacy-preserving large language models in radiology</article-title><source>Radiology</source><year>2023</year><month>10</month><volume>309</volume><issue>1</issue><fpage>e232335</fpage><pub-id pub-id-type="doi">10.1148/radiol.232335</pub-id><pub-id pub-id-type="medline">37815443</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hallinan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Leow</surname><given-names>NW</given-names> </name><name name-style="western"><surname>Ong</surname><given-names>W</given-names> </name><etal/></person-group><article-title>MRI spine request form enhancement and auto protocoling using a secure institutional large language model</article-title><source>Spine J</source><year>2025</year><month>03</month><volume>25</volume><issue>3</issue><fpage>505</fpage><lpage>514</lpage><pub-id pub-id-type="doi">10.1016/j.spinee.2024.10.021</pub-id><pub-id pub-id-type="medline">39536908</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hallinan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Leow</surname><given-names>NW</given-names> </name><name name-style="western"><surname>Low</surname><given-names>YX</given-names> </name><etal/></person-group><article-title>An institutional large language model for musculoskeletal MRI improves protocol adherence and accuracy</article-title><source>J Bone Joint Surg Am</source><year>2025</year><month>07</month><day>8</day><volume>107</volume><issue>16</issue><fpage>1833</fpage><lpage>1840</lpage><pub-id pub-id-type="doi">10.2106/JBJS.24.01429</pub-id><pub-id pub-id-type="medline">40627696</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhayana</surname><given-names>R</given-names> </name><name name-style="western"><surname>Alwahbi</surname><given-names>O</given-names> </name><name name-style="western"><surname>Ladak</surname><given-names>AM</given-names> </name><etal/></person-group><article-title>Leveraging large language models to generate clinical histories for oncologic imaging requisitions</article-title><source>Radiology</source><year>2025</year><month>02</month><volume>314</volume><issue>2</issue><fpage>e242134</fpage><pub-id pub-id-type="doi">10.1148/radiol.242134</pub-id><pub-id pub-id-type="medline">39903072</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Terzis</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kaya</surname><given-names>K</given-names> </name><name name-style="western"><surname>Sch&#x00F6;mig</surname><given-names>T</given-names> </name><etal/></person-group><article-title>GPT-4 for automated sequence-level determination of MRI protocols based on radiology request forms from clinical routine</article-title><source>Eur Radiol</source><year>2026</year><month>02</month><volume>36</volume><issue>2</issue><fpage>1541</fpage><lpage>1552</lpage><pub-id pub-id-type="doi">10.1007/s00330-025-11888-4</pub-id><pub-id pub-id-type="medline">40779162</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tavakoli</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>D</given-names> </name></person-group><article-title>AI-generated clinical histories for radiology reports: closing the information gap</article-title><source>Radiology</source><year>2025</year><month>02</month><volume>314</volume><issue>2</issue><fpage>e243910</fpage><pub-id pub-id-type="doi">10.1148/radiol.243910</pub-id><pub-id pub-id-type="medline">39903081</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abedi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Tofighi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Salehi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Latterman</surname><given-names>PT</given-names> </name><name name-style="western"><surname>Basques</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Gholamrezanezhad</surname><given-names>A</given-names> </name></person-group><article-title>Reason for Exam Imaging Reporting and Data System (RI-RADS): a grading system to standardize radiology requisitions</article-title><source>Eur J Radiol</source><year>2019</year><month>11</month><volume>120</volume><fpage>108661</fpage><pub-id pub-id-type="doi">10.1016/j.ejrad.2019.108661</pub-id><pub-id pub-id-type="medline">31610322</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Asgari</surname><given-names>E</given-names> </name><name name-style="western"><surname>Monta&#x00F1;a-Brown</surname><given-names>N</given-names> </name><name name-style="western"><surname>Dubois</surname><given-names>M</given-names> </name><etal/></person-group><article-title>A framework to assess clinical safety and hallucination rates of LLMs for medical text summarisation</article-title><source>NPJ Digit Med</source><year>2025</year><month>05</month><day>13</day><volume>8</volume><issue>1</issue><fpage>274</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01670-7</pub-id><pub-id pub-id-type="medline">40360677</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gwet</surname><given-names>KL</given-names> </name></person-group><article-title>Computing inter-rater reliability and its variance in the presence of high agreement</article-title><source>Br J Math Stat Psychol</source><year>2008</year><month>05</month><volume>61</volume><issue>Pt 1</issue><fpage>29</fpage><lpage>48</lpage><pub-id pub-id-type="doi">10.1348/000711006X126600</pub-id><pub-id pub-id-type="medline">18482474</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x00C7;amur</surname><given-names>E</given-names> </name><name name-style="western"><surname>Cesur</surname><given-names>T</given-names> </name><name name-style="western"><surname>G&#x00FC;ne&#x015F;</surname><given-names>YC</given-names> </name><etal/></person-group><article-title>Evaluating large language models for imaging modality selection: potential to reduce unnecessary contrast agent use and radiation exposure</article-title><source>Clin Imaging</source><year>2025</year><month>09</month><volume>125</volume><fpage>110573</fpage><pub-id pub-id-type="doi">10.1016/j.clinimag.2025.110573</pub-id><pub-id pub-id-type="medline">40752449</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kanemaru</surname><given-names>N</given-names> </name><name name-style="western"><surname>Yasaka</surname><given-names>K</given-names> </name><name name-style="western"><surname>Okimoto</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Efficacy of fine-tuned large language model in CT protocol assignment as clinical decision-supporting system</article-title><source>J Imaging Inform Med</source><year>2025</year><month>12</month><volume>38</volume><issue>6</issue><fpage>4336</fpage><lpage>4348</lpage><pub-id pub-id-type="doi">10.1007/s10278-025-01433-6</pub-id><pub-id pub-id-type="medline">39909993</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>DYZ</given-names> </name><name name-style="western"><surname>Le</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>ChatGPT performance in assessing musculoskeletal MRI scan appropriateness based on ACR appropriateness criteria</article-title><source>Sci Rep</source><year>2025</year><month>02</month><day>28</day><volume>15</volume><issue>1</issue><fpage>7140</fpage><pub-id pub-id-type="doi">10.1038/s41598-025-88925-1</pub-id><pub-id pub-id-type="medline">40021679</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Triana</surname><given-names>BP</given-names> </name><name name-style="western"><surname>Wiggins</surname><given-names>WF</given-names> </name><name name-style="western"><surname>Befera</surname><given-names>N</given-names> </name><name name-style="western"><surname>Roth</surname><given-names>C</given-names> </name><name name-style="western"><surname>Cline</surname><given-names>B</given-names> </name></person-group><article-title>Proof-of-concept prompted large language model for radiology procedure request routing</article-title><source>J Vasc Interv Radiol</source><year>2025</year><month>07</month><volume>36</volume><issue>7</issue><fpage>1201</fpage><lpage>1207</lpage><pub-id pub-id-type="doi">10.1016/j.jvir.2025.03.012</pub-id><pub-id pub-id-type="medline">40139433</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Akinci D&#x2019;Antonoli</surname><given-names>T</given-names> </name><name name-style="western"><surname>Stanzione</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bluethgen</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Large language models in radiology: fundamentals, applications, ethical considerations, risks, and future directions</article-title><source>Diagn Interv Radiol</source><year>2024</year><month>03</month><day>6</day><volume>30</volume><issue>2</issue><fpage>80</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.4274/dir.2023.232417</pub-id><pub-id pub-id-type="medline">37789676</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kao</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Kao</surname><given-names>HT</given-names> </name></person-group><article-title>Large language models in radiology: a technical and clinical perspective</article-title><source>Eur J Radiol Artif Intell</source><year>2025</year><month>06</month><volume>2</volume><fpage>100021</fpage><pub-id pub-id-type="doi">10.1016/j.ejrai.2025.100021</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Omar</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sorin</surname><given-names>V</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>JD</given-names> </name><etal/></person-group><article-title>Multi-model assurance analysis showing large language models are highly vulnerable to adversarial hallucination attacks during clinical decision support</article-title><source>Commun Med (Lond)</source><year>2025</year><month>08</month><day>2</day><volume>5</volume><issue>1</issue><fpage>330</fpage><pub-id pub-id-type="doi">10.1038/s43856-025-01021-3</pub-id><pub-id pub-id-type="medline">40753316</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Large language model prompts.</p><media xlink:href="jmir_v28i1e82579_app1.docx" xlink:title="DOCX File, 42 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Reason for Exam Imaging Reporting and Data System (RI-RADS).</p><media xlink:href="jmir_v28i1e82579_app2.docx" xlink:title="DOCX File, 22 KB"/></supplementary-material></app-group></back></article>