<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v27i1e68998</article-id>
      <article-id pub-id-type="pmid">40371947</article-id>
      <article-id pub-id-type="doi">10.2196/68998</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Review</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Review</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Scientific Evidence for Clinical Text Summarization Using Large Language Models: Scoping Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>de Azevedo Cardoso</surname>
            <given-names>Taiane</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chrimes</surname>
            <given-names>Dillon</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lin</surname>
            <given-names>Rebecca</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sivarajkumar</surname>
            <given-names>Sonish</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yu</surname>
            <given-names>Ping</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lee</surname>
            <given-names>Chanseo</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Bednarczyk</surname>
            <given-names>Lydie</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Division of Medical Information Sciences</institution>
            <institution>University Hospital of Geneva</institution>
            <addr-line>Rue Gabrielle-Perret-Gentil 4</addr-line>
            <addr-line>Geneva, 1205</addr-line>
            <country>Switzerland</country>
            <phone>41 797686970</phone>
            <email>lydie.bednarczyk@hug.ch</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-2602-2927</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Reichenpfader</surname>
            <given-names>Daniel</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8052-3359</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Gaudet-Blavignac</surname>
            <given-names>Christophe</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6527-5898</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Ette</surname>
            <given-names>Amon Kenna</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7637-0057</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Zaghir</surname>
            <given-names>Jamil</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8209-6098</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>Yuanyuan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-7608-2571</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Bensahla</surname>
            <given-names>Adel</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-8144-2331</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Bjelogrlic</surname>
            <given-names>Mina</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6922-3283</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
          <degrees>MPH, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2681-8076</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Division of Medical Information Sciences</institution>
        <institution>University Hospital of Geneva</institution>
        <addr-line>Geneva</addr-line>
        <country>Switzerland</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Institute for Patient-centered Digital Health</institution>
        <institution>Bern University of Applied Sciences</institution>
        <addr-line>Biel</addr-line>
        <country>Switzerland</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Faculty of Medicine</institution>
        <institution>University of Geneva</institution>
        <addr-line>Geneva</addr-line>
        <country>Switzerland</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Lydie Bednarczyk <email>lydie.bednarczyk@hug.ch</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>5</month>
        <year>2025</year>
      </pub-date>
      <volume>27</volume>
      <elocation-id>e68998</elocation-id>
      <history>
        <date date-type="received">
          <day>20</day>
          <month>11</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>1</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>21</day>
          <month>2</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>12</day>
          <month>3</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Lydie Bednarczyk, Daniel Reichenpfader, Christophe Gaudet-Blavignac, Amon Kenna Ette, Jamil Zaghir, Yuanyuan Zheng, Adel Bensahla, Mina Bjelogrlic, Christian Lovis. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 15.05.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2025/1/e68998" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Information overload in electronic health records requires effective solutions to alleviate clinicians’ administrative tasks. Automatically summarizing clinical text has gained significant attention with the rise of large language models. While individual studies show optimism, a structured overview of the research landscape is lacking.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to present the current state of the art on clinical text summarization using large language models, evaluate the level of evidence in existing research and assess the applicability of performance findings in clinical settings.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This scoping review complied with the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) guidelines. Literature published between January 1, 2019, and June 18, 2024, was identified from 5 databases: PubMed, Embase, Web of Science, IEEE Xplore, and ACM Digital Library. Studies were excluded if they did not describe transformer-based models, did not focus on clinical text summarization, did not engage with free-text data, were not original research, were nonretrievable, were not peer-reviewed, or were not in English, French, Spanish, or German. Data related to study context and characteristics, scope of research, and evaluation methodologies were systematically collected and analyzed by 3 authors independently.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 30 original studies were included in the analysis. All used observational retrospective designs, mainly using real patient data (n=28, 93%). The research landscape demonstrated a narrow research focus, often centered on summarizing radiology reports (n=17, 57%), primarily involving data from the intensive care unit (n=15, 50%) of US-based institutions (n=19, 73%), in English (n=26, 87%). This focus aligned with the frequent reliance on the open-source Medical Information Mart for Intensive Care dataset (n=15, 50%). Summarization methodologies predominantly involved abstractive approaches (n=17, 57%) on single-document inputs (n=4, 13%) with unstructured data (n=13, 43%), yet reporting on methodological details remained inconsistent across studies. Model selection involved both open-source models (n=26, 87%) and proprietary models (n=7, 23%). Evaluation frameworks were highly heterogeneous. All studies conducted internal validation, but external validation (n=2, 7%), failure analysis (n=6, 20%), and patient safety risks analysis (n=1, 3%) were infrequent, and none reported bias assessment. Most studies used both automated metrics and human evaluation (n=16, 53%), while 10 (33%) used only automated metrics, and 4 (13%) only human evaluation.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Key barriers hinder the translation of current research into trustworthy, clinically valid applications. Current research remains exploratory and limited in scope, with many applications yet to be explored. Performance assessments often lack reliability, and clinical impact evaluations are insufficient raising concerns about model utility, safety, fairness, and data privacy. Advancing the field requires more robust evaluation frameworks, a broader research scope, and a stronger focus on real-world applicability.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>summarization</kwd>
        <kwd>large language models</kwd>
        <kwd>natural language processing</kwd>
        <kwd>health care</kwd>
        <kwd>electronic health records</kwd>
        <kwd>scoping review</kwd>
        <kwd>translational research</kwd>
        <kwd>artificial intelligence</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>In February 2024, Van Veen et al [<xref ref-type="bibr" rid="ref1">1</xref>] reported that large language models (LLMs) could outperform medical experts in clinical text summarization. Their work investigated the effectiveness of specifically tailored models to accurately summarize clinical documents. However, a careful analysis of the experimental design and the evaluation methodology questions this statement.</p>
      <p>Clinical text summarization is described by Keszthelyi et al [<xref ref-type="bibr" rid="ref2">2</xref>] as the art of collecting, synthesizing, and communicating patient information. An effective summary must be tailored to meet the needs of its intended audience, which requires a clear definition of the clinical text summary’s purpose to ensure relevance and meaning.</p>
      <p>In the fast-paced environment of modern health care, coupled with information overload in electronic health records (EHRs), physicians face added cognitive load and time pressure. Misunderstandings, incomplete information sharing, or delays in conveying critical patient details ultimately affect the quality of care and decision-making [<xref ref-type="bibr" rid="ref3">3</xref>]. Thus, reducing the administrative burden on clinicians has become a critical need.</p>
      <p>LLMs can process significant volumes of input data and produce coherent output text [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. As such, they present an opportunity to alleviate clinicians’ administrative workload by summarizing patient information contained in EHRs. While ensuring strict adherence to data privacy standards, effective models could deliver context-specific summaries that meet clinical objectives. Potential applications include optimizing information retrieval, as critical data are often buried within extensive, noisy, and repetitive entries [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]; or automating summarization tasks that are traditionally carried out manually, such as discharge summaries [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>] (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Additionally, patient-directed simplified reports could support informed decision-making [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>The process of generating clinical summaries using LLMs. Source documents from EHRs, such as radiology reports, progress notes, nursing notes, laboratory test results, and medical reports, are processed by an LLM to generate a summary intended, in this case, for a physician. EHR: electronic health record; LLM: large language model. Created in BioRender (Bednarczyk, L., 2025, https://BioRender.com/trqp263; [<xref ref-type="bibr" rid="ref12">12</xref>]).</p>
        </caption>
        <graphic xlink:href="jmir_v27i1e68998_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>Several researchers have examined the application of LLMs in health care [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Bedi et al [<xref ref-type="bibr" rid="ref15">15</xref>], Wang et al [<xref ref-type="bibr" rid="ref17">17</xref>], Park et al [<xref ref-type="bibr" rid="ref14">14</xref>], and Pressman et al [<xref ref-type="bibr" rid="ref16">16</xref>] all reported concerns regarding the strategies used to evaluate these models. Meng et al [<xref ref-type="bibr" rid="ref13">13</xref>] highlighted the lack of robust clinical studies to validate LLMs’ effectiveness and safety in real-world settings.</p>
      <p>To the best of our knowledge, no comprehensive review has specifically addressed the performance of LLMs in clinical text summarization. This gap raises critical questions: Do LLMs genuinely outperform medical experts in summarizing clinical texts? Can they be used with confidence in clinical settings for summarization tasks?</p>
      <p>This review seeks to assess the reliability of performance findings and their applicability to health care settings through a detailed analysis of the literature, including insights from studies such as those discussed by Van Veen et al [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
      <p>The specific research objectives are as follows.</p>
      <list list-type="bullet">
        <list-item>
          <p>Present the current state of research on clinical text summarization using LLMs.</p>
        </list-item>
        <list-item>
          <p>Evaluate the level of evidence in the current state of research.</p>
        </list-item>
        <list-item>
          <p>Assess whether these models can be used with confidence in clinical settings.</p>
        </list-item>
        <list-item>
          <p>Provide expert recommendations for current and future research.</p>
        </list-item>
      </list>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>This scoping review focused on the summarization of clinical text within EHRs using LLMs. The authors adhered to the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) checklist (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Search Strategy</title>
        <p>Relevant literature, published between January 1, 2019, and June 18, 2024, was identified from 5 databases: PubMed, Embase, Web of Science, IEEE Xplore, and ACM Digital Library. The search strategy was structured around three key dimensions: “summarization,” “large language models,” and “healthcare,” which were derived from the above-mentioned research objectives and combined with Boolean operators. Related search terms and exact database queries are presented in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Eligibility Criteria</title>
        <p>Eligibility criteria (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>), defined prior to the screening process and agreed upon by 2 authors (DR and LB), were framed as exclusion criteria to ensure a comprehensive identification of all relevant papers. Studies that did not describe a model based on the original transformer architecture as introduced by Vaswani et al [<xref ref-type="bibr" rid="ref18">18</xref>], did not focus on clinical text summarization, or did not engage with free-text data were excluded. We also excluded publications that were not original research, including editorials, reviews, or comments, as well as those that were not retrievable. Only peer-reviewed literature in English, French, Spanish, or German was considered. January 1, 2019, was chosen as the cutoff date based on existing literature [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        <boxed-text id="box1" position="float">
          <title>Eligibility criteria.</title>
          <p>Exclusion criteria</p>
          <list list-type="bullet">
            <list-item>
              <p>The source of evidence (SOE) does not describe a model based on the original transformer architecture.</p>
            </list-item>
            <list-item>
              <p>The SOE does not describe clinical text summarization or summarize medical texts that are not clinical (eg, biomedical texts or medical evidence summarization).</p>
            </list-item>
            <list-item>
              <p>The SOE does not deal with free-text data.</p>
            </list-item>
            <list-item>
              <p>The SOE is published before January 1, 2019.</p>
            </list-item>
            <list-item>
              <p>The SOE does not describe original research.</p>
            </list-item>
            <list-item>
              <p>The SOE is not published in English, French, Spanish, or German.</p>
            </list-item>
            <list-item>
              <p>The SOE is an editorial, review, or comment.</p>
            </list-item>
            <list-item>
              <p>The SOE is not retrievable.</p>
            </list-item>
            <list-item>
              <p>The SOE is not peer-reviewed.</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Screening Process</title>
        <p>During the initial quality check, a random sample of 10 studies, including titles and abstracts, were screened by 2 authors (LB and DR) to refine exclusion criteria and ensure consistency in screening. Discrepancies were resolved collaboratively.</p>
        <p>The screening process was then conducted in 2 stages by the same 2 authors (LB and DR). In the first stage, titles and abstracts of the remaining studies were independently screened, with any conflicting decisions defaulting to eligibility to prevent premature exclusion of potentially relevant studies. In the second stage, full-text screening was performed independently by both authors, with any disagreements resolved through discussion and consensus.</p>
      </sec>
      <sec>
        <title>Data Synthesis</title>
        <p>Before data extraction, a second quality improvement phase was conducted. Two authors (LB and DR) independently extracted data from a random sample of 3 studies to refine and finalize the data extraction table. Subsequently, full-text screening and data extraction were independently carried out by three authors (LB, CGB, and AKE) using a predefined spreadsheet.</p>
        <p>Data extraction was organized into three main aspects: (1) study context and characteristics, (2) scope of research, and (3) evaluation methodologies.</p>
        <list list-type="bullet">
          <list-item>
            <p>Study context and characteristics included the year and location of publication (based on the corresponding author’s address), the type of journal, the study design (inferred based on the described methodology, distinguishing between retrospective and prospective approaches), and the type of dataset used categorized as real patient data (open-source or proprietary) or synthetic data.</p>
          </list-item>
          <list-item>
            <p>The scope of research encompassed information related to the field of application, the summary intention, summarizing techniques used, technological aspects, and ethical considerations. The field of application refers to the domain where the summarization methods were developed and evaluated, covering department, country, patient demographics, and language coverage. Department and country details were extracted directly from the dataset information. Summary intention refers to the purpose of the summary, defined in this work based on the target audience, the summarization objective, and the source document. The summarization technique covered the details of input documents and the summarization techniques used. Technological aspects included relevant modeling characteristics (pretraining, fine-tuning strategies, and prompt engineering) approaches, deployment environments (eg, on-premises, cloud-based), hardware requirements, and associated computational costs. Ethical considerations included dataset deidentification and the reporting of institutional review board (IRB) approval.</p>
          </list-item>
          <list-item>
            <p>Evaluation methodologies included the strategies used, sample sizes, metrics, and additional details on each evaluation framework used.</p>
          </list-item>
        </list>
        <p>Any discrepancies in data extraction were resolved through discussion with a fourth author (DR) to ensure accuracy and consistency throughout the process. The completed data extraction is provided in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref49">49</xref>].</p>
        <p>No assumption was made about missing or unclear details unless explicitly stated. The authors of the included studies were not contacted for clarification. The extraction focused exclusively on information pertinent to the summarization of clinical text found in the EHR.</p>
        <p>Extracted data were synthesized using a descriptive approach, complemented by narrative synthesis, and presented in tables and figures where applicable. Data were summarized and described according to the 3 categories of data extraction as described earlier. The synthesis aimed to clearly outline key trends and characteristics across the included studies without conducting statistical analyses or quantitative meta-analyses.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Study Contexts and Characteristics</title>
        <p>This scoping review included a total of 30 studies. Of the 281 retrieved by database queries, 25 were deemed eligible following title, abstract, and full-text screening. Additionally, 5 studies were incorporated through manual reference screening using the snowballing technique [<xref ref-type="bibr" rid="ref20">20</xref>]. An overview of the literature retrieval and screening process is presented in the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flowchart (<xref rid="figure2" ref-type="fig">Figure 2</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flow chart.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e68998_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Publications were distributed across interdisciplinary (n=15, 50%), engineering (n=10, 33%), and clinical (n=5, 17%) journals. The annual publication count showed an exponential growth trend, with no study published prior to 2020 (<xref rid="figure3" ref-type="fig">Figure 3</xref>). Clinical document summarization was the primary research objective in the majority of studies (n=27, 97%), while Li et al [<xref ref-type="bibr" rid="ref21">21</xref>] used summarization as a preprocessing step in sepsis prediction.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Annual publication count categorized by journal type.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e68998_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The study location was dominated by the United States (n=10, 33%), followed by China (n=5, 17%), India (n=3, 10%) [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>], and the United Kingdom (n=2, 7%) [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Other locations included Australia [<xref ref-type="bibr" rid="ref27">27</xref>], France [<xref ref-type="bibr" rid="ref28">28</xref>], Germany [<xref ref-type="bibr" rid="ref29">29</xref>], Lebanon [<xref ref-type="bibr" rid="ref30">30</xref>], Spain [<xref ref-type="bibr" rid="ref31">31</xref>], and Taiwan [<xref ref-type="bibr" rid="ref32">32</xref>] (n=1, 3% each), and Jiang et al [<xref ref-type="bibr" rid="ref33">33</xref>] reported multiple corresponding authors from different countries. For 10% of studies (n=3), the corresponding author’s address could not be determined [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>].</p>
        <p>Among the studies, all used an observational retrospective design, using existing patient data to evaluate model performance. 93% used real patient data (n=28), primarily from open-source datasets (n=18, 60%), with the Medical Information Mart for Intensive Care (MIMIC) series (n=15, 50%) and the Indiana University X-Ray database (n=8, 27%) being the most reported (<xref ref-type="table" rid="table1">Table 1</xref>). Additionally, 47% of studies used proprietary databases (n=14). Goswami et al [<xref ref-type="bibr" rid="ref22">22</xref>] mentioned using real patient data but did not specify the dataset used. Caterson et al [<xref ref-type="bibr" rid="ref25">25</xref>] and Wu et al [<xref ref-type="bibr" rid="ref28">28</xref>] used synthetic data, relying on scenarios generated by the authors.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Distribution of specific open-source datasets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <col width="0"/>
            <thead>
              <tr valign="top">
                <td>Open-source dataset</td>
                <td>Publications, n (%)</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>MIMIC<sup>a</sup></td>
                <td>15 (50)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>IU X-Ray<sup>b</sup></td>
                <td>8 (27)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>KCH<sup>c</sup></td>
                <td>1 (3)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>SAM<sup>d</sup></td>
                <td>1 (3)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>Stanford Coll<sup>e</sup></td>
                <td>1 (3)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>PubMed</td>
                <td colspan="2">1 (3)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>MIMIC: Medical Information Mart for Intensive Care.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>IU-XRAY: Indiana University X-Ray database.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>KCH: King’s College Hospital database.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>SAM: SAMSum corpus.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>Stanford Coll: Stanford Hospital Collection.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Scope of Research</title>
        <sec>
          <title>Field of Application</title>
          <p>Clinical departments in which the models were evaluated could be retrieved in 22 studies (73%) and included mainly the intensive care unit (ICU; n=15, 50%) neurology (n=2, 7%) [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], and oncology (n=2, 7%) [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. Other departments included cardiology [<xref ref-type="bibr" rid="ref38">38</xref>], geriatrics [<xref ref-type="bibr" rid="ref27">27</xref>], neurosurgery [<xref ref-type="bibr" rid="ref39">39</xref>], and orthopedics [<xref ref-type="bibr" rid="ref25">25</xref>] (n=1, 3% each). Chen et al [<xref ref-type="bibr" rid="ref32">32</xref>] and Vinod et al [<xref ref-type="bibr" rid="ref24">24</xref>] both addressed multiple areas.</p>
          <p>Dataset geographical origin was identified in 28 studies (93%). It was predominantly the United States (n=19, 63%) and China (n=3/30, 10%) [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. Other origins included Germany [<xref ref-type="bibr" rid="ref29">29</xref>], the United Kingdom [<xref ref-type="bibr" rid="ref26">26</xref>], Taiwan [<xref ref-type="bibr" rid="ref32">32</xref>], Spain [<xref ref-type="bibr" rid="ref31">31</xref>], and Australia [<xref ref-type="bibr" rid="ref27">27</xref>] (n=1, 3% each). Additionally, Searle et al [<xref ref-type="bibr" rid="ref26">26</xref>] included patients from 2 different countries. Four studies (13%) reported on patient demographics: 3 (10%) provided information on sex, race, and ethnicity [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], and López et al [<xref ref-type="bibr" rid="ref31">31</xref>] on the sex-age ratio of the study population.</p>
          <p>The language coverage was mainly English (n=26, 87%), including 3 (n=3, 10%) studies explicitly stating it and 23 (n=23, 77%) inferred based on dataset sources. Additionally, 3 studies (10%) addressed the summarization of clinical documents in Chinese [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>], and 1 (3%) in German [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        </sec>
        <sec>
          <title>Summary Intention</title>
          <p>The intended audience of the generated summary was specified in 16 (53%) studies. Most studies targeted health care professionals (n=12, 40%), while a smaller proportion focused on patients (n=2, 7%) [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. In addition, 2 studies (n=2, 7%) aimed to serve both patients and health care professionals [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Li et al [<xref ref-type="bibr" rid="ref21">21</xref>] used summarization as a preprocessing step for subsequent modeling tasks to enhance sepsis prediction, rendering the identification of a target audience irrelevant.</p>
          <p>Most studies (n=27, 90%) focused on a single summarization task while only a few, such as Alkhalaf et al [<xref ref-type="bibr" rid="ref27">27</xref>], Van Veen et al [<xref ref-type="bibr" rid="ref1">1</xref>], and Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>], explored multiple summarization tasks. The details of summarization objectives and input sources used in each study are provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref49">49</xref>].</p>
          <p>Summarization objectives included mainly generating the impression section of radiology reports (n=12, 40%), followed by generating the hospital course section of discharge summaries (n=3, 10%). Notably, the description of the summarization objectives varied across studies. Some specified the exact section of a document to be generated (eg, the impression section of radiology reports), while others described the types of source documents and the key information to be extracted [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
          <p>Regarding input sources, most studies (n=19, 73%) used a single type of text corpus, with radiology reports being the most common (n=17, 57%), followed by progress notes (n=2, 7%) and patient forms (n=1, 3%). Overall, 23% (n=7) of studies used multiple-type text corpora. Furthermore, 13% (n=4) did not explicitly specify their source [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        </sec>
        <sec>
          <title>Summarization Methodology</title>
          <p>Fifteen (50%) studies explicitly mentioned the input source structure: 43% (n=13) used unstructured data only, while 7% (n=2) also reported using structured data such as patient demographics [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. See <xref rid="figure4" ref-type="fig">Figure 4</xref> for an overview of the reported information on experimental design. Six (20%) studies explicitly reported on the number of documents used as input at once: 4 (13%) mentioned single-document summarization, Searle et al [<xref ref-type="bibr" rid="ref26">26</xref>] specified multidocument summarization, where the model took a cluster of related documents as input, and Chien et al [<xref ref-type="bibr" rid="ref39">39</xref>] referred to a single-multiple document approach, where multiple documents were combined and treated as a single input. 20 publications (67%) specified the summarization technique used: 17 (57%) used abstractive methods, 7 (23%) extractive methods, and 2 (7%) hybrid methods [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Among these, 5 (17%) evaluated at least 2 approaches. While abstractive summarization generates new sentences that paraphrase the core ideas of the source text, extractive summarization selects and compiles existing key sentences or phrases directly from the original content [<xref ref-type="bibr" rid="ref50">50</xref>].</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Overview of the reported information, including the medical field, dataset demographics, geographical origin of the test set, language coverage of the study, the intended audience of the summary (eg, physician, patient), a document used as input, summarization objective, input format (structured, unstructured, and both), input document count (single, multiple, and single-multiple), summarization approach (abstractive, extractive, and hybrid), and deployment environment (on-premises and cloud-based) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref49">49</xref>]. NA: not available.</p>
            </caption>
            <graphic xlink:href="jmir_v27i1e68998_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Twenty-six (87%) studies reported on open-source models, with bidirectional encoder representations from transformers–based models being the most frequently cited (n=12, 40%). Seven (23%) studies investigated proprietary models, all based on commercial services provided by OpenAI. Two (7%) examined both model types [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], while Li et al [<xref ref-type="bibr" rid="ref21">21</xref>] used an ensemble model. Seven (23%) studies performed additional pretraining of existing model architectures prior to fine-tuning. Furthermore, 23 (77%) mentioned fine-tuning. Ten (33%) studies conducted prompt engineering.</p>
          <p>Eight (27%) studies reported on the deployment environment: 3 (10%) indicated on-premise deployment [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], and 6 (20%) the use of external services, including Azure OpenAI application programming interface (API) [<xref ref-type="bibr" rid="ref1">1</xref>], and OpenAI API [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. Sixteen (53%) studies reported on hardware requirements, whereas none mentioned associated costs.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Deidentification was conducted in 13 (43%) studies, and anonymization in 2 (7%) studies. Additionally, Wang et al [<xref ref-type="bibr" rid="ref41">41</xref>] mentioned “data desensitization,” and López et al [<xref ref-type="bibr" rid="ref31">31</xref>] mentioned ensuring that no patient or doctor information was included in datasets. IRB approval was reported in 30% (n=9) of studies.</p>
      </sec>
      <sec>
        <title>Evaluation Methodology</title>
        <sec>
          <title>Evaluation Approaches</title>
          <p>External validation, which involves testing models on datasets not used during training, was not explicitly reported in most cases. However, 2 (7%) studies reported testing on unseen datasets during model development (training or fine-tuning) and relied solely on automated metrics for external evaluation [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Internal validation, which assesses a model’s performance using a dedicated sample of the dataset it was trained on, was conducted in all studies.</p>
          <p>Global performance evaluation, defined as the evaluation process conducted on the full test set, was conducted was conducted all studies. Furthermore, 16 (53%) studies used both automatic validation metrics and human evaluations, while 10 (33%) used automatic metrics only, and 4 (13%) human evaluations only. Subgroup performance evaluation was reported in 2 (7%) papers. Out-of-distribution performance testing, a process that evaluates how well a model performs on data that are underrepresented in the training set [<xref ref-type="bibr" rid="ref51">51</xref>], was reported by Van Veen et al [<xref ref-type="bibr" rid="ref47">47</xref>]. Subcategory testing was reported by Liang et al [<xref ref-type="bibr" rid="ref29">29</xref>], who evaluated the impact of different stages of cancer on model performance using patient degree matching.</p>
          <p>Model failure analysis was conducted through error categorization in 6 (20%) studies. Four (13%) studies addressed both the classification and quantification of error and 2 (7%) focused exclusively on error classification [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. Bias analysis remained unexplored, as no studies explicitly report structured bias assessments. Finally, patient safety risk analysis was assessed by Van Veen et al [<xref ref-type="bibr" rid="ref1">1</xref>] (n=1, 3%) using a Healthcare Adapted Risk Management Scale [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
          <p>A broader understanding of model performance was achieved through various approaches, including ablation studies (n=8, 27%) and attention distribution analysis (n=2, 7%) [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. Additionally, Zhao et al [<xref ref-type="bibr" rid="ref43">43</xref>] included analyses of sparsity, loss curves, and the Zipf distribution, while Li et al [<xref ref-type="bibr" rid="ref21">21</xref>] evaluated model performance through a proxy task involving sepsis prediction.</p>
        </sec>
        <sec>
          <title>Automated Performance Metrics</title>
          <p>Automated performance metrics were reported in 26 (87%) papers. Test set size, mentioned in 23 (77%) studies, included mainly between 15 and 1000 documents (0-1000 documents: 33%; 1000–5000 documents: 30%; more than 5000 documents: 10%). Randomized sampling was explicitly mentioned in 5 (17%) publications, while 3 used the entire dataset as a test set (10%) [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>].</p>
          <p>Reference-based metrics were widely used (n=25, 77%). Specifically, this type of metrics compared generated summaries to predefined reference summaries that encompassed the original document (n=18, 60%), manually labeled data (n=4, 13%), and expert-generated summaries (n=2, 7%) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Two (7%) studies did not mention the reference summary used [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Validation of reference summaries was conducted in 7 (23%) studies, including 1 (3%) using the original document as a reference summary [<xref ref-type="bibr" rid="ref43">43</xref>]. Recall-Oriented Understudy for Gisting Evaluation score was the most frequent performance measure (n=24, 80%), followed by Bilingual Evaluation Understudy (n=8, 27%) and bidirectional encoder representations from transformers score (n=7, 23%).</p>
          <p>The test set count and evaluation metrics for both automatic metrics and human evaluation in each publication are detailed in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref49">49</xref>].</p>
        </sec>
        <sec>
          <title>Human Evaluation</title>
          <p>Human assessment was reported in 20 (67%) papers. Test set size, mentioned in 18 (60%) studies, included mainly between 2 and 50 documents (2-50 documents: n=10, 33%; 50–100 documents: n=6, 20%; more than 100 documents: n=2, 7%). Randomized sampling was explicitly mentioned in 12 (40%) publications, including Li et al [<xref ref-type="bibr" rid="ref21">21</xref>] who specified the use of 5-fold cross-validation. Additionally, 3 (10%) studies used the entire dataset [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref41">41</xref>].</p>
          <p>Metrics, detailed in 18 (60%) studies, encompassed readability (n=12, 40%), factual correctness (n=12, 40%), and the adequacy of provided information directly related to the summary intention (n=12, 40%). These assessments include relevance [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], completeness [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref37">37</xref>] (n=2, 7% each), the ability to capture critical information [<xref ref-type="bibr" rid="ref47">47</xref>], adequacy [<xref ref-type="bibr" rid="ref44">44</xref>], informativeness [<xref ref-type="bibr" rid="ref34">34</xref>], omission or insertion [<xref ref-type="bibr" rid="ref25">25</xref>], comprehensiveness [<xref ref-type="bibr" rid="ref39">39</xref>], and effectiveness [<xref ref-type="bibr" rid="ref24">24</xref>] (n=1, 3% each). Additionally, clinical use was assessed in 3 (10%) studies and involved estimated clinical time saved [<xref ref-type="bibr" rid="ref28">28</xref>], ease of revision [<xref ref-type="bibr" rid="ref44">44</xref>], and clinical use [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
          <p>Blind analysis was reported in 6 (20%) publications. The assessor recruitment process was documented by Jiang et al [<xref ref-type="bibr" rid="ref33">33</xref>] and Li et al [<xref ref-type="bibr" rid="ref21">21</xref>] (n=2, 7%), who specified including volunteers and invitees respectively. Assessors’ affiliation, reported in 16 (53%) studies varied. Most studies (n=14, 47%) included at least 1 in-domain physician. The inclusion of several participants in the manual review process was common (n=14, 47%), with 9 (30%) studies specifically involving at least 2 in-domain physicians.</p>
          <p>Ten studies (33%, 10/30) reported at least 2 raters per document. Interannotator agreement score was reported in 4 (13%) studies and included intraclass correlation [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], Pearson <italic>r</italic> of human evaluation scores [<xref ref-type="bibr" rid="ref44">44</xref>], and Cohen κ [<xref ref-type="bibr" rid="ref26">26</xref>]. The use of measurement scales, documented in 14 papers (47%), involved mainly numeric scores (n=7, 23%) and Likert Scale (n=6, 20%). Additionally, Cai et al [<xref ref-type="bibr" rid="ref44">44</xref>] used a scoring scale [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
          <p>Three (10%) studies mentioned following a specific protocol for human evaluation: Chien et al [<xref ref-type="bibr" rid="ref39">39</xref>] applied the approach proposed by Goldstein et al [<xref ref-type="bibr" rid="ref52">52</xref>], and Lopez et al [<xref ref-type="bibr" rid="ref31">31</xref>] and Searle et al [<xref ref-type="bibr" rid="ref26">26</xref>] followed the protocol SummEval proposed by Fabbri et al [<xref ref-type="bibr" rid="ref53">53</xref>]. The study setup was documented in 6 (20%) studies, where authors described the tools used in the research process, such as interfaces and questionnaires.</p>
          <p>Summary ranking methods were primarily independent (n=19, 63%), where items are evaluated independently without direct comparison to one another. In contrast, Lopez et al [<xref ref-type="bibr" rid="ref31">31</xref>] used a pairwise comparison approach, where items are evaluated by directly comparing 2 options at a time, allowing for a more relative assessment of rankings.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This scoping review offers a comprehensive overview of the state of research on clinical text summarization using LLMs. Concerns have been raised about the applicability of certain machine learning models in clinical settings, where their effectiveness failed to meet real-world expectations [<xref ref-type="bibr" rid="ref54">54</xref>-<xref ref-type="bibr" rid="ref56">56</xref>]. Our analysis suggests that similar challenges may extend to the field of automated summarization. The following sections discuss findings, highlight key barriers in translating research findings into practical clinical applications, and propose directions for future research.</p>
        <sec>
          <title>A Narrow Research Scope</title>
          <p>Current research focuses on a limited range of summarization objectives, patient populations, and medical specialties. Most studies (n=17, 57%) focused on radiology reports, particularly the impression section (n=15, 50%). Study populations were predominantly ICU patients (n=15, 50%), from US-based institutions (n=19, 73%). Language coverage was predominantly English (n=26, 87%). MIMIC, an open-source dataset originating from ICU from Beth Israel Deaconess Medical Center was used in 50% of studies (<xref rid="figure4" ref-type="fig">Figure 4</xref>) [<xref ref-type="bibr" rid="ref57">57</xref>].</p>
          <p>The limited research scope, combined with the heavy reliance on a few publicly available datasets, raises concerns about whether research is driven by clinical needs or data availability. ICU patients constitute only a small subset of the broader health care population encountered in routine clinical practice, limiting the generalizability of findings. The practical necessity of automating the impression section in radiology reports is debatable [<xref ref-type="bibr" rid="ref58">58</xref>].</p>
          <p>Additionally, open-source datasets fail to capture real-world complexities such as variations in writing styles, clinical workflows, and patient populations [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>]. As a result, many potential applications remain yet unexplored. Future research should ensure alignment with clinical needs, and expand its scope to include a wider range of use cases, medical specialties, patient populations, and language coverage (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>).</p>
          <boxed-text id="box2" position="float">
            <title>Key research priorities in study elaboration for large language model (LLM)–based summarization. This textbox highlights critical areas for refining and expanding research methodologies to improve clinical validity, ethical and legal compliance, and practical deployment of summarization models in health care.</title>
            <p>
              <bold>Research objectives</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Ensure research objectives directly address real-world clinical challenges.</p>
              </list-item>
              <list-item>
                <p>Expand the scope of investigation by diversifying use cases, medical specialties, and patient populations to enhance practical applicability.</p>
              </list-item>
              <list-item>
                <p>Clearly define the context of summarization objectives, including purpose, target audience, and expected outcomes, to ensure a clinically valid evaluation.</p>
              </list-item>
            </list>
            <p>
              <bold>Summarization methodology</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Expand knowledge of both the performance and limitations of LLMs by evaluating their ability to handle real-world complexities, such as multisource summarization.</p>
              </list-item>
            </list>
            <p>
              <bold>Model performance and clinical impact evaluation</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Conduct a context-aware validation of the model (or task-specific evaluation) through human assessment.</p>
              </list-item>
              <list-item>
                <p>Assess model generalizability via external validation, subcategory testing, and k-fold cross-validation.</p>
              </list-item>
              <list-item>
                <p>Evaluate model utility by analyzing its impact on the intended audience to determine whether summarization systems effectively fulfill their clinical purpose.</p>
              </list-item>
              <list-item>
                <p>Perform failure analysis and subsequent patient safety risk assessment to evaluate the clinical impact of identified errors on patient safety.</p>
              </list-item>
              <list-item>
                <p>Conduct bias analysis by assessing outputs for discriminative language, including bias related to gender, race, socioeconomic status, substance use, and mental health, to ensure fairness.</p>
              </list-item>
            </list>
            <p>
              <bold>Address data security and privacy</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Ensure compliance with legal agreements, and implement data protection strategies such as on-premises deployment to safeguard patient privacy and regulatory standards.</p>
              </list-item>
            </list>
          </boxed-text>
          <p>Another limitation observed in this research is the lack of a clear description of summarization objectives. For example, Zhu et al [<xref ref-type="bibr" rid="ref40">40</xref>] described objectives as “to generate a few significant impressions” or “to generate a few critical diagnosis results” (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref49">49</xref>]). These descriptions lack precision, making it difficult to assess what constitutes a “significant” or “critical” diagnosis result, and how the output should be evaluated. As such, comparing studies and assessing model effectiveness becomes difficult.</p>
          <p>Poorly defined research objectives can also lead to irrelevant research or fail to ensure proper model validation. For example, Helwan et al [<xref ref-type="bibr" rid="ref30">30</xref>] identified patients as the target audience and summarizing and simplifying radiology reports as the summarization objective. In contrast, their application focused on generating the impression section of radiology reports primarily meant for communication between the ordering physician and the radiologist [<xref ref-type="bibr" rid="ref61">61</xref>]. This misalignment between the stated objective and the actual application further highlights the need for greater clarity in research goals.</p>
          <p>Future research should define summarization objectives, specifying purpose, target audience, and expected outcomes, to ensure a clinically relevant evaluation by aligning assessment criteria with intended clinical applications (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>). Additionally, studies should provide a detailed text corpus description, ensuring clear and consistent terminology across regions, disciplines, and institutions to facilitate comparability across studies (<xref ref-type="boxed-text" rid="box3">Textbox 3</xref>).</p>
          <boxed-text id="box3" position="float">
            <title>Reporting recommendations specific to the large language model (LLM)–based summarization research.</title>
            <p>
              <bold>Research objective</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Provide a thorough description of the summarization objective studied, a precise understanding of its purpose, intended audience, and expected outcome.</p>
              </list-item>
            </list>
            <p>
              <bold>Summarization methodology and model evaluation</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Provide a clear and detailed description of the text corpus used, ensuring that terminology is well-defined and understandable across different regions, disciplines, and institutions.</p>
              </list-item>
              <list-item>
                <p>Specify the structure of the input test data (structured and unstructured).</p>
              </list-item>
              <list-item>
                <p>Report the number of documents processed by the model (single, multiple, and multiple-single).</p>
              </list-item>
              <list-item>
                <p>Detail the summarization technique used (abstractive, extractive, and hybrid).</p>
              </list-item>
              <list-item>
                <p>Specify the model deployment environment (on-premises and externally).</p>
              </list-item>
              <list-item>
                <p>Provide demographic information on the dataset used for LLM development, tuning, or evaluation.</p>
              </list-item>
            </list>
            <p>
              <bold>Technological and cost requirements</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>Provide a detailed report on the computational resources needed for model deployment and outline the associated costs.</p>
              </list-item>
            </list>
          </boxed-text>
        </sec>
        <sec>
          <title>Limitations in Addressing Real-World Effectiveness</title>
          <p>Most studies used abstractive summarization (n=17, 57%), typically with single-document inputs (n=4, 13%) and unstructured data (n=13, 43%). In terms of deployment methods, 10% (n=3) of models deployed on-premises and 20% (n=6) used external cloud services. Furthermore, as depicted in <xref rid="figure4" ref-type="fig">Figure 4</xref>, notable underreporting across studies was noted.</p>
          <p>These findings highlight concerns about model effectiveness, particularly in synthesizing multisource data and handling longitudinal records. In clinical practice, physicians summarize patient data from single or multiple sources using different perspectives for decision-making, communication, or documentation. However, only Searle et al [<xref ref-type="bibr" rid="ref26">26</xref>] specified multidocument summarization, while Chien et al [<xref ref-type="bibr" rid="ref39">39</xref>] mentioned a single-multiple document approach. Multidocument summarization presents additional challenges, such as maintaining coherence, reducing redundancy, and ensuring consistency [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>]. Furthermore, temporality remains a key obstacle, as clinical narratives span longitudinal records. Chien et al [<xref ref-type="bibr" rid="ref39">39</xref>] suggested that overcoming this challenge may require dedicated models for temporal relation extraction to accurately capture event sequencing. Thus, the full spectrum of summarization performance remains largely unexplored. Future research should deepen insights into LLM strengths and weaknesses by assessing their ability to manage real-world challenges, including multisource summarization (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>).</p>
          <p>Additionally, concerns persist regarding the real-world applicability of current summarization models due to deployment constraints. On-premise deployment presents hardware limitations that must be addressed to maintain optimal performance and accuracy, whereas cloud-based models necessitate stringent data privacy measures and compliance with regulatory frameworks [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>]. Without systematic reporting on these deployment considerations, it remains uncertain whether summarization models can be practically integrated into health care workflows. Identifying practical strategies for real-world implementation is essential to bridge the gap between research and clinical application.</p>
          <p>Finally, the widespread under-reporting across studies undermines the validity of performance assessments (<xref rid="figure4" ref-type="fig">Figure 4</xref>), making it difficult to compare models, reproduce results, and evaluate their real-world feasibility. To address this, future research must adopt comprehensive reporting guidelines, such as the Transparent Reporting of a multivariable prediction model for Individual Prognosis or Diagnosis (TRIPOD)+LLM checklist, which extends the original TRIPOD guidelines to ensure standardized reporting in LLM-based health care research [<xref ref-type="bibr" rid="ref66">66</xref>]. Additionally, <xref ref-type="boxed-text" rid="box3">Textbox 3</xref> outlines reporting recommendations specific to summarization research. Enhancing transparency in reporting will improve integrity, enable meaningful cross-study comparisons, and ultimately support the development of summarization models that are both clinically relevant and practically deployable.</p>
        </sec>
        <sec>
          <title>Challenges and Limitations in Model Evaluation</title>
          <p>Evaluation frameworks varied widely in used strategies and metrics (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref49">49</xref>]). All studies conducted internal validation, primarily with automatic metrics (n=26, 87%), while 67% (n=20) included human evaluation. Few studies conducted external validation (n=2, 7%), failure analysis (n=6, 20%), or patient safety risk assessments (n=1, 3%), and none performed bias evaluation.</p>
        </sec>
        <sec>
          <title>Challenges in Ensuring Reliable Evaluations</title>
          <p>Effectively evaluating generative outputs in summarization tasks remains a challenge, as it requires evaluating both textual quality and contextual appropriateness [<xref ref-type="bibr" rid="ref66">66</xref>]. Human evaluation remains the gold standard for text summarization, offering the contextual insight necessary for accurate summary assessment [<xref ref-type="bibr" rid="ref67">67</xref>-<xref ref-type="bibr" rid="ref69">69</xref>]. Automatic validation metrics, however, lack contextual understanding and therefore cannot reliably determine relevance, correlate poorly with human judgment, and are considered underinformative [<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref70">70</xref>].</p>
          <p>Findings suggest that a notable portion of studies (33%) may lack a clinically meaningful assessment of model performance, as they relied solely on automatic validation metrics. Several studies reported limitations of automatic validation metrics in summary evaluation. Cai et al [<xref ref-type="bibr" rid="ref44">44</xref>] and Hartmann et al [<xref ref-type="bibr" rid="ref45">45</xref>] noted that these metrics were not adequate to capture summary quality, while Liang et al [<xref ref-type="bibr" rid="ref29">29</xref>] and Zhao et al [<xref ref-type="bibr" rid="ref43">43</xref>] reported that these metrics did not evaluate the clinical validity (or usability) of summaries. As no consensus is reached on the reliability of automatic validation metrics in evaluating abstractive summarization, several studies have proposed combining automatic metrics with human evaluation and conducting correlation analyses to better assess their validity and practical value [<xref ref-type="bibr" rid="ref66">66</xref>].</p>
          <p>Further limitations were observed among the 20 studies conducting human evaluations: 15% (3/20) did not report the number of assessors [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref43">43</xref>], 20% (4/20) did not specify the assessor’s affiliation [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>], 90% (18/20) did not specify their assessors’ recruitment processes, and 70% (14/20) of studies lacked blinded analysis. While 50% (10/20) of studies reported dual annotations, only 4 out of 20 (20%) reported interannotator agreement. Addressing these issues is essential to ensure reliable performance assessment. While yet no standardized best practices for human evaluation in text summarization currently exist, research in this area is progressing in a promising direction. Tam et al [<xref ref-type="bibr" rid="ref71">71</xref>] proposed an evaluation framework aimed at enhancing reliability, generalizability, and applicability in human evaluation practices. Similarly, Van der Lee et al [<xref ref-type="bibr" rid="ref67">67</xref>] introduced a set of best practices for the manual review of the automatically generated text, contributing to the development of more structured and consistent evaluation methodologies.</p>
        </sec>
        <sec>
          <title>Challenges in Evaluating the Robustness of Model Performance</title>
          <p>Assessing model robustness is crucial, as clinical settings differ in patient populations, writing styles, and medical practices. However, several issues were identified in the studies reviewed, particularly regarding the depth and comprehensiveness of the evaluation processes.</p>
          <p>The sample sizes used in human evaluation were limited. Most studies assessed model performance on as few as 2 to 50 documents. Since LLMs generate nondeterministic outputs, they can vary between each iteration [<xref ref-type="bibr" rid="ref72">72</xref>]. A previous work by Tam et al [<xref ref-type="bibr" rid="ref71">71</xref>] suggests that at least 130 documents should be evaluated when testing clinical decision support tools to enable meaningful performance assessments.</p>
          <p>In addition, model generalizability, referring to the model’s ability to perform effectively across diverse clinical settings, populations, or conditions beyond those on which it was originally trained [<xref ref-type="bibr" rid="ref55">55</xref>], was frequently overlooked in the analyzed studies. External validation, essential for detecting overfitting and assessing cross-site transportability [<xref ref-type="bibr" rid="ref73">73</xref>], was only reported by Kondadadi et al [<xref ref-type="bibr" rid="ref35">35</xref>] and Dai et al [<xref ref-type="bibr" rid="ref46">46</xref>]. Similarly, subgroup performance analysis was conducted in another 2 studies: Liang et al [<xref ref-type="bibr" rid="ref29">29</xref>] evaluated cancer stage-specific performance using subpopulation testing, and Van Veen et al [<xref ref-type="bibr" rid="ref47">47</xref>] conducted out-of-distribution testing, which involves evaluating the model data on underrepresented samples of the dataset [<xref ref-type="bibr" rid="ref51">51</xref>]. Additionally, 5-fold cross-validation, which involves partitioning the training data into different subsets to ensure consistent model performance, was only used by Li et al [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
          <p>Assessing model generalizability involves understanding how data variability affects model performance [<xref ref-type="bibr" rid="ref74">74</xref>]. Distribution shifts in training data can lead to underperformance in underrepresented populations while causing overfitting in overrepresented subgroups [<xref ref-type="bibr" rid="ref51">51</xref>]. As such, while models may demonstrate high performance within their training environments, their ability to generalize to broader clinical settings remains uncertain.</p>
          <p>To ensure robustness, future research should prioritize generalizability assessments, including rigorous out-of-distribution testing and subcategory analysis within the same dataset, even when access to multiple external datasets is limited. Moreover, systematic reporting of demographic characteristics is essential for enhancing transparency and ensuring models are evaluated across diverse populations (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>) [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref75">75</xref>,<xref ref-type="bibr" rid="ref76">76</xref>].</p>
        </sec>
        <sec>
          <title>Limitations in Addressing the Clinical Impact of Summarization Models</title>
          <sec>
            <title>Clinical Utility</title>
            <p>Few studies assessed the clinical impact of LLM-based summarization. Wu et al [<xref ref-type="bibr" rid="ref28">28</xref>] evaluated perceived benefits by clinicians, focusing on estimated time savings. Cai et al [<xref ref-type="bibr" rid="ref44">44</xref>] examined the ease of revision, while Zhao et al [<xref ref-type="bibr" rid="ref43">43</xref>] assessed the model’s clinical utility.</p>
            <p>LLMs are expected to reduce clinician workload and improve information synthesis, raising high adoption expectations [<xref ref-type="bibr" rid="ref75">75</xref>]. However, beyond assessing their technical performance, a thorough evaluation of both their utility and risks is crucial to inform responsible implementation and compliance with legal standards [<xref ref-type="bibr" rid="ref77">77</xref>,<xref ref-type="bibr" rid="ref78">78</xref>]. Future studies should determine whether summarization systems effectively serve their intended clinical purpose [<xref ref-type="bibr" rid="ref77">77</xref>]. Additionally, since all studies included in this review were retrospective, prospective studies could provide a deeper understanding of the actual impact of LLM-based summarization models in clinical workflows [<xref ref-type="bibr" rid="ref56">56</xref>].</p>
          </sec>
          <sec>
            <title>Patient Safety Risks</title>
            <p>The safety and potential clinical harm of evaluated models remained frequently unaddressed. Failure analysis was conducted in only 20% (n=6) of studies, with Van Veen et al [<xref ref-type="bibr" rid="ref1">1</xref>] being the only study to examine patient safety risks.</p>
            <p>Without a clear understanding of failure patterns, it becomes inherently difficult to assess their impact on clinical decision-making and patient safety, as does the development of effective safeguards to mitigate potential risks. While recent studies have attempted to establish taxonomies for hallucinations [<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref80">80</xref>], failures may be inherently task-specific, underscoring the need for systematic error analysis and categorization based on existing research. Additionally, their impact on patient safety should be evaluated using appropriate risk analysis methods (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>).</p>
            <p>Although no standardized risk assessment framework currently exists, a step toward addressing this gap is the risk matrix-based evaluation framework introduced by Asgari et al [<xref ref-type="bibr" rid="ref81">81</xref>], which provides a structured methodology for categorizing and quantifying errors, enabling a systematic assessment of their impact on patient safety. Establishing standardized methodologies for failure analysis and risk assessment will be essential to ensure the safe and effective deployment of these models in clinical practice.</p>
          </sec>
          <sec>
            <title>Bias and Fairness</title>
            <p>LLMs risk exacerbating health disparities [<xref ref-type="bibr" rid="ref82">82</xref>] as they absorb intrinsic biases during training on diverse data sources. These biases can manifest as harms in specific downstream tasks, impacting clinical decision-making [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref83">83</xref>]. For example, Zack et al [<xref ref-type="bibr" rid="ref84">84</xref>] identified stereotypical demographic representations in LLM-generated diagnostic and treatment recommendations related to sex, ethnicity, and race. As a generative process, abstractive summarization also raises the potential to perpetuate these biases [<xref ref-type="bibr" rid="ref83">83</xref>].</p>
            <p>Despite these concerns, no study reported a biased assessment. Future research should use diverse datasets, prioritize bias detection, particularly in identifying discriminative language in model outputs, and develop mitigation strategies to ensure fairness in clinical decision support (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>) [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref75">75</xref>,<xref ref-type="bibr" rid="ref76">76</xref>].</p>
          </sec>
          <sec>
            <title>Data Privacy and Security</title>
            <p>Ensuring data privacy and security is essential for ethical and legal compliance in research, and to enable valid and reproducible studies that can inform the responsible adoption of LLMs for clinical summarization. However, several studies used proprietary models to process patient datasets, raising concerns about data privacy and regulatory compliance.</p>
            <p>For example, Van Veen et al [<xref ref-type="bibr" rid="ref1">1</xref>], Ma et al [<xref ref-type="bibr" rid="ref36">36</xref>], and Li et al [<xref ref-type="bibr" rid="ref21">21</xref>] used the MIMIC series, however, only Van Veen et al [<xref ref-type="bibr" rid="ref1">1</xref>] explicitly reported using the Azure OpenAI API. This raises concerns as the PhysioNet Credentialed Data Use Agreement explicitly prohibits sharing credentialed datasets (eg, MIMIC-III, MIMIC-IV, MIMIC-CXR) with third-party AI services, such as OpenAI APIs [<xref ref-type="bibr" rid="ref85">85</xref>]. Chien et al [<xref ref-type="bibr" rid="ref39">39</xref>] and Wang et al [<xref ref-type="bibr" rid="ref41">41</xref>] used deidentified proprietary datasets. However, Wang et al [<xref ref-type="bibr" rid="ref41">41</xref>] used these datasets without IRB approval, raising ethical and regulatory concerns regarding research oversight, patient privacy, and compliance with data protection standards.</p>
            <p>Model selection must balance multiple factors, including performance, regulatory compliance, ethical considerations, and socioeconomic factors. Proprietary models, such as GPT-4, are attractive as they offer strong performance, ease of use, and cost-efficiency. However, there are currently there are currently no clear regulatory guidelines or ethical consensus on handling patient information in private models [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref86">86</xref>].</p>
            <p>While Ma et al [<xref ref-type="bibr" rid="ref36">36</xref>] argue that deidentification sufficiently protects patient privacy, studies have demonstrated that deidentified data could be reidentified, raising concerns about its reliability as a sole privacy safeguard [<xref ref-type="bibr" rid="ref87">87</xref>]. Despite ongoing advancements in data protection methods, vulnerabilities persist, with emerging privacy attacks continuously exposing weaknesses in data protection measures [<xref ref-type="bibr" rid="ref88">88</xref>].</p>
            <p>Given these challenges, future research must explicitly disclose data-sharing practices, ensure compliance with legal agreements, and adopt privacy-preserving strategies, such as on-premises deployment (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>). Locally installed open-source solutions provide a controlled environment tailored to institutional needs, ensuring strict data privacy compliance while supporting valid, reproducible studies that can lead to practical solutions for reducing clinicians’ workload.</p>
          </sec>
          <sec>
            <title>Technological and Cost Requirements</title>
            <p>The successful deployment of LLMs also depends on their computational resource requirements and cost implications. While most studies reported hardware and memory requirements, none provided a detailed cost analysis.</p>
            <p>Without these insights, health care institutions may struggle to assess the feasibility of implementing LLM-based solutions. Future research should go beyond hardware specifications to evaluate the economic impact of deployment to ensure that LLM adoption is both technically and financially sustainable in clinical settings (<xref ref-type="boxed-text" rid="box3">Textbox 3</xref>).</p>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>The data extraction process was conducted independently by 3 authors, which enhances the accuracy and reliability of the results. By aligning with the PRISMA-ScR checklist, we ensure transparency throughout the review, allowing for a coherent and well-documented process.</p>
        <p>However, several limitations must be acknowledged. First, to finalize the data extraction table, we randomly assessed 3 studies, following the methodology outlined by Pollock et al [<xref ref-type="bibr" rid="ref89">89</xref>]. This may have introduced bias in defining extracted variables. Second, certain aspects of data extraction were not fully explored in this study, such as the architecture types of models and their performance. This decision was made to maintain a broader focus rather than a highly technical approach. Additionally, providing a detailed performance evaluation was not meaningful due to the limitations mentioned in the discussion. Third, as authors were not contacted, reliance on published information alone may have introduced bias, particularly where methodological details were incomplete or underreported. Finally, the rapid pace of new research may result in some emerging studies being missed, and relevant sources may be inaccessible due to publication bias or their status as preprints.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This scoping review highlights key barriers to translating research advancements into practical applications, indicating that the field is still in its early stages. Research remains limited in scope, often shaped by dataset availability rather than explicitly guided by clinical needs, leaving many potential applications unexplored. Performance assessments frequently lack reliability and robustness, making it difficult to accurately evaluate model effectiveness. Furthermore, clinical impact evaluations remain insufficient, raising concerns about model utility, potential risks, fairness, data privacy, and broader technological and cost implications.</p>
        <p>To advance this field, future research must broaden its scope, strengthen methodological transparency, and improve the reliability of evaluation frameworks. Additionally, enhancing model robustness and conducting comprehensive clinical impact assessments will be essential for determining the practical value of LLM-based summarization.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) checklist.</p>
        <media xlink:href="jmir_v27i1e68998_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 404 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Search queries from databases (as of June 18, 2024).</p>
        <media xlink:href="jmir_v27i1e68998_app2.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Data extraction table.</p>
        <media xlink:href="jmir_v27i1e68998_app3.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 190 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Summary of input sources and corresponding summarization objectives for each publication.</p>
        <media xlink:href="jmir_v27i1e68998_app4.docx" xlink:title="DOCX File , 53 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Additional information on results: test set size and specific evaluation metrics reported for each publications.</p>
        <media xlink:href="jmir_v27i1e68998_app5.docx" xlink:title="DOCX File , 24 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">IRB</term>
          <def>
            <p>institutional review board</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">MIMIC</term>
          <def>
            <p>Medical Information Mart for Intensive Care</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PRISMA</term>
          <def>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PRISMA-ScR</term>
          <def>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">TRIPOD</term>
          <def>
            <p>Transparent Reporting of a multivariable prediction model for Individual Prognosis or Diagnosis</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="con">
        <p>LB conceptualized the study; developed the methodology; extracted and analyzed the data, wrote the first draft, and reviewed and edited the manuscript. DR conceptualized the study; developed the methodology; wrote the first draft and reviewed the manuscript. CGB conceptualized the study; extracted and analyzed the data; and reviewed the manuscript. AKE extracted and analyzed the data; wrote the first draft and reviewed the manuscript. YZ, AB, and MB conceptualized the study and reviewed the manuscript. CL supervised the review. All authors have full access to all the data in the study and take responsibility for the integrity of the data and the accuracy of the data analysis. All authors gave final approval.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van Veen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Uden</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Blankemeier</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Delbrouck</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Aali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bluethgen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pareek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Polacin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Reis</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Seehofnerová</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rohatgi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hosamani</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ahuja</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Hom</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gatidis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pauly</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhari</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Adapted large language models can outperform medical experts in clinical text summarization</article-title>
          <source>Nat Med</source>
          <year>2024</year>
          <volume>30</volume>
          <issue>4</issue>
          <fpage>1134</fpage>
          <lpage>1142</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-02855-5</pub-id>
          <pub-id pub-id-type="medline">38413730</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-02855-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC11479659</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keszthelyi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gaudet-Blavignac</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bjelogrlic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lovis</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Patient information summarization in clinical settings: scoping review</article-title>
          <source>JMIR Med Inform</source>
          <year>2023</year>
          <volume>11</volume>
          <fpage>e44639</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2023//e44639/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/44639</pub-id>
          <pub-id pub-id-type="medline">38015588</pub-id>
          <pub-id pub-id-type="pii">v11i1e44639</pub-id>
          <pub-id pub-id-type="pmcid">PMC10716777</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schnipper</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fitall</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gale</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Approach to improving patient safety: Communication</article-title>
          <source>PSNet</source>
          <year>2020</year>
          <access-date>2024-09-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://psnet.ahrq.gov/perspective/approach-improving-patient-safety-communication">https://psnet.ahrq.gov/perspective/approach-improving-patient-safety-communication</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birhane</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kasirzadeh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Leslie</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wachter</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Science in the age of large language models</article-title>
          <source>Nat Rev Phys</source>
          <year>2023</year>
          <volume>5</volume>
          <issue>5</issue>
          <fpage>277</fpage>
          <lpage>280</lpage>
          <pub-id pub-id-type="doi">10.1038/s42254-023-00581-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meskó</surname>
              <given-names>Bertalan</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>The imperative for regulatory oversight of large language models (or generative AI) in healthcare</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <month>07</month>
          <day>06</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>120</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-023-00873-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00873-0</pub-id>
          <pub-id pub-id-type="medline">37414860</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00873-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC10326069</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Redundancy in electronic health record corpora: analysis, impact on text mining performance and mitigation strategies</article-title>
          <source>BMC Bioinformatics</source>
          <year>2013</year>
          <month>01</month>
          <day>16</day>
          <volume>14</volume>
          <fpage>10</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-10"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-14-10</pub-id>
          <pub-id pub-id-type="medline">23323800</pub-id>
          <pub-id pub-id-type="pii">1471-2105-14-10</pub-id>
          <pub-id pub-id-type="pmcid">PMC3599108</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feblowitz</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Samal</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sittig</surname>
              <given-names>DF</given-names>
            </name>
          </person-group>
          <article-title>Summarization of clinical information: a conceptual model</article-title>
          <source>J Biomed Inform</source>
          <year>2011</year>
          <month>08</month>
          <volume>44</volume>
          <issue>4</issue>
          <fpage>688</fpage>
          <lpage>99</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(11)00059-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2011.03.008</pub-id>
          <pub-id pub-id-type="medline">21440086</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(11)00059-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thirunavukarasu</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DSJ</given-names>
            </name>
            <name name-style="western">
              <surname>Elangovan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gutierrez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DSW</given-names>
            </name>
          </person-group>
          <article-title>Large language models in medicine</article-title>
          <source>Nat Med</source>
          <year>2023</year>
          <month>08</month>
          <volume>29</volume>
          <issue>8</issue>
          <fpage>1930</fpage>
          <lpage>1940</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id>
          <pub-id pub-id-type="medline">37460753</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-023-02448-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singhal</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Azizi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mahdavi</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Scales</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tanwani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cole-Lewis</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pfohl</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Payne</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Seneviratne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gamble</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Babiker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schärli</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chowdhery</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mansfield</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Agüera Y Arcas</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Webster</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Matias</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gottweis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tomasev</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rajkomar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barral</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Semturs</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikesalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Natarajan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Large language models encode clinical knowledge</article-title>
          <source>Nature</source>
          <year>2023</year>
          <volume>620</volume>
          <issue>7972</issue>
          <fpage>172</fpage>
          <lpage>180</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37438534"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41586-023-06291-2</pub-id>
          <pub-id pub-id-type="medline">37438534</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41586-023-06291-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC10396962</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clusmann</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kolbinger</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>Muti</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Carrero</surname>
              <given-names>ZI</given-names>
            </name>
            <name name-style="western">
              <surname>Eckardt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laleh</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Löffler</surname>
              <given-names>CML</given-names>
            </name>
            <name name-style="western">
              <surname>Schwarzkopf</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Unger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Veldhuizen</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kather</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>The future landscape of large language models in medicine</article-title>
          <source>Commun Med</source>
          <year>2023</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>141</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s43856-023-00370-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s43856-023-00370-1</pub-id>
          <pub-id pub-id-type="medline">37816837</pub-id>
          <pub-id pub-id-type="pii">10.1038/s43856-023-00370-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC10564921</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marinello</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Di Cianni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Del Bianco</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mattioli</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sota</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cantarini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Emmi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Leccese</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lopalco</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mosca</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Padula</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Piga</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Salvarani</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Taruscio</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Talarico</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Empowering patients in the therapeutic decision-making process: a glance into Behçet's syndrome</article-title>
          <source>Front Med</source>
          <year>2021</year>
          <volume>8</volume>
          <fpage>769870</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34966756"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fmed.2021.769870</pub-id>
          <pub-id pub-id-type="medline">34966756</pub-id>
          <pub-id pub-id-type="pmcid">PMC8710680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bednarczyk</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>
            <xref rid="figure1" ref-type="fig">Figure 1</xref>
          </article-title>
          <source>BioRender</source>
          <year>2025</year>
          <access-date>2024-04-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://app.biorender.com/citation/68078b899c45b69d2a8640db">https://app.biorender.com/citation/68078b899c45b69d2a8640db</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>The application of large language models in medicine: a scoping review</article-title>
          <source>iScience</source>
          <year>2024</year>
          <volume>27</volume>
          <issue>5</issue>
          <fpage>109713</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-0042(24)00935-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.isci.2024.109713</pub-id>
          <pub-id pub-id-type="medline">38746668</pub-id>
          <pub-id pub-id-type="pii">S2589-0042(24)00935-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC11091685</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pillai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Paget</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Naugler</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Assessing the research landscape and clinical utility of large language models: a scoping review</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2024</year>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>72</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-024-02459-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-024-02459-6</pub-id>
          <pub-id pub-id-type="medline">38475802</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-024-02459-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC10936025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bedi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Orr-Ewing</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dash</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Koyejo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fries</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Wornow</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Swaminathan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kashyap</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chaurasia</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tazbaz</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Milstein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pfeffer</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Testing and evaluation of health care applications of large language models: a systematic review</article-title>
          <source>JAMA</source>
          <year>2025</year>
          <volume>333</volume>
          <issue>4</issue>
          <fpage>319</fpage>
          <lpage>328</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2024.21700</pub-id>
          <pub-id pub-id-type="medline">39405325</pub-id>
          <pub-id pub-id-type="pii">2825147</pub-id>
          <pub-id pub-id-type="pmcid">PMC11480901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pressman</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Borna</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez-Cabello</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Haider</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Haider</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Forte</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Clinical and surgical applications of large language models: a systematic review</article-title>
          <source>J Clin Med</source>
          <year>2024</year>
          <volume>13</volume>
          <issue>11</issue>
          <fpage>3041</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jcm13113041"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jcm13113041</pub-id>
          <pub-id pub-id-type="medline">38892752</pub-id>
          <pub-id pub-id-type="pii">jcm13113041</pub-id>
          <pub-id pub-id-type="pmcid">PMC11172607</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of ChatGPT and other conversational large language models in healthcare</article-title>
          <source>MedRxiv. Preprint posted online on April 27, 2024</source>
          <year>2024</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38712148"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2024.04.26.24306390</pub-id>
          <pub-id pub-id-type="medline">38712148</pub-id>
          <pub-id pub-id-type="pii">2024.04.26.24306390</pub-id>
          <pub-id pub-id-type="pmcid">PMC11071576</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Attention is All you Need</article-title>
          <source>Advances in Neural Information Processing Systems</source>
          <year>2017</year>
          <access-date>2024-07-23</access-date>
          <publisher-loc>United Kingdom</publisher-loc>
          <publisher-name>Curran Associates, Inc</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html">https://proceedings.neurips.cc/paper_files/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A systematic survey of text summarization: from statistical methods to large language models</article-title>
          <source>ACM Comput Surv</source>
          <year>2025</year>
          <pub-id pub-id-type="doi">10.1145/3731445</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wohlin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kalinowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Romero Felizardo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mendes</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Successful combination of database search and snowballing for identification of primary studies in systematic literature studies</article-title>
          <source>Inf Softw Technol</source>
          <year>2022</year>
          <volume>147</volume>
          <fpage>106908</fpage>
          <pub-id pub-id-type="doi">10.1016/j.infsof.2022.106908</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Early prediction of sepsis using chatGPT-generated summaries and structured data</article-title>
          <source>Multimed Tools Appl</source>
          <year>2024</year>
          <volume>83</volume>
          <issue>41</issue>
          <fpage>89521</fpage>
          <lpage>89543</lpage>
          <pub-id pub-id-type="doi">10.1007/s11042-024-18378-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goswami</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Prajapati</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Parameter-efficient fine-tuning large language model approach for hospital discharge paper summarization</article-title>
          <source>Appl Soft Comput</source>
          <year>2024</year>
          <volume>157</volume>
          <fpage>111531</fpage>
          <pub-id pub-id-type="doi">10.1016/j.asoc.2024.111531</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ajad</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Saini</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Niranjan</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Rad-formertructuring radiology reports using transformers*</article-title>
          <year>2023</year>
          <conf-name>5th International Conference on Recent Advances in Information Technology (RAIT)</conf-name>
          <conf-date>March 3, 2023</conf-date>
          <conf-loc>Dhanbad, India</conf-loc>
          <fpage>1</fpage>
          <lpage>6</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vinod</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Safar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mathew</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Venugopal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Joly</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Fine-tuning the BERTSUMEXT model for clinical report summarization</article-title>
          <year>2020</year>
          <conf-name>International Conference for Emerging Technology (INCET)</conf-name>
          <conf-date>June 5-7, 2020</conf-date>
          <conf-loc>Belgaum, India</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caterson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ambler</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Cereceda-Monteoliva</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Horner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Poacher</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>Application of generative language models to orthopaedic practice</article-title>
          <source>BMJ Open</source>
          <year>2024</year>
          <volume>14</volume>
          <issue>3</issue>
          <fpage>e076484</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=38485486"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2023-076484</pub-id>
          <pub-id pub-id-type="medline">38485486</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2023-076484</pub-id>
          <pub-id pub-id-type="pmcid">PMC10941106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Searle</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Teo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Discharge summary hospital course summarisation of in patient electronic health record text with clinical concept guided deep pre-trained transformer models</article-title>
          <source>J Biomed Inform</source>
          <year>2023</year>
          <volume>141</volume>
          <fpage>104358</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(23)00079-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2023.104358</pub-id>
          <pub-id pub-id-type="medline">37023846</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(23)00079-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alkhalaf</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Applying generative AI with retrieval augmented generation to summarize and extract key clinical information from electronic health records</article-title>
          <source>J Biomed Inform</source>
          <year>2024</year>
          <volume>156</volume>
          <fpage>104662</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(24)00080-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2024.104662</pub-id>
          <pub-id pub-id-type="medline">38880236</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(24)00080-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bibault</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Pilot applications of GPT-4 in radiation oncology: Summarizing patient symptom intake and targeted chatbot applications</article-title>
          <source>Radiother Oncol</source>
          <year>2024</year>
          <volume>190</volume>
          <fpage>109978</fpage>
          <pub-id pub-id-type="doi">10.1016/j.radonc.2023.109978</pub-id>
          <pub-id pub-id-type="medline">37913954</pub-id>
          <pub-id pub-id-type="pii">S0167-8140(23)89872-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kades</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Fink</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Fine-tuning BERT models for summarizing German radiology findings</article-title>
          <year>2022</year>
          <conf-name>Proceedings of the 4th Clinical Natural Language Processing Workshop</conf-name>
          <conf-date>July 1, 2022</conf-date>
          <conf-loc>Seattle, WA</conf-loc>
          <fpage>30</fpage>
          <lpage>40</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Helwan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Azar</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ozsahin</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Medical reports summarization using text-to-text transformer</article-title>
          <year>2023</year>
          <conf-name>Advances in Science and Engineering Technology International Conferences (ASET)</conf-name>
          <conf-date>February 20-23, 2023</conf-date>
          <conf-loc>Dubai, UAE</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>López-Úbeda</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Martín-Noguerol</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Díaz-Angulo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Luna</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of large language models performance against humans for summarizing MRI knee radiology reports: a feasibility study</article-title>
          <source>Int J Med Inform</source>
          <year>2024</year>
          <volume>187</volume>
          <fpage>105443</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105443</pub-id>
          <pub-id pub-id-type="medline">38615509</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(24)00106-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Modified bidirectional encoder representations from transformers extractive summarization model for hospital information systems based on character-level tokens (AlphaBERT): development and performance evaluation</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <volume>8</volume>
          <issue>4</issue>
          <fpage>e17787</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/4/e17787/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17787</pub-id>
          <pub-id pub-id-type="medline">32347806</pub-id>
          <pub-id pub-id-type="pii">v8i4e17787</pub-id>
          <pub-id pub-id-type="pmcid">PMC7221648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Learning to summarize Chinese radiology findings with a pre-trained encoder</article-title>
          <source>IEEE Trans Biomed Eng</source>
          <year>2023</year>
          <volume>70</volume>
          <issue>12</issue>
          <fpage>3277</fpage>
          <lpage>3287</lpage>
          <pub-id pub-id-type="doi">10.1109/TBME.2023.3280987</pub-id>
          <pub-id pub-id-type="medline">37314905</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>ChestXRayBERT: A pretrained language model for chest radiology report summarization</article-title>
          <source>IEEE Trans Multimedia</source>
          <year>2023</year>
          <volume>25</volume>
          <fpage>845</fpage>
          <lpage>855</lpage>
          <pub-id pub-id-type="doi">10.1109/tmm.2021.3132724</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kondadadi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manchanda</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ngo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McCormack</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Optum at MEDIQA 2021: Abstractive summarization of radiology reports using simple BART finetuning</article-title>
          <year>2021</year>
          <conf-name>Proceedings of the 20th Workshop on Biomedical Language Processing</conf-name>
          <conf-date>June 1, 2021</conf-date>
          <conf-loc>Virtual meeting</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>An iterative optimizing framework for radiology report summarization with ChatGPT</article-title>
          <source>IEEE Trans Artif Intell</source>
          <year>2024</year>
          <volume>99</volume>
          <fpage>1</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1109/tai.2024.3364586</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hartman</surname>
              <given-names>VC</given-names>
            </name>
            <name name-style="western">
              <surname>Bapat</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Weiner</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Navi</surname>
              <given-names>BB</given-names>
            </name>
            <name name-style="western">
              <surname>Sholle</surname>
              <given-names>ET</given-names>
            </name>
            <name name-style="western">
              <surname>Campion</surname>
              <given-names>TR</given-names>
            </name>
          </person-group>
          <article-title>A method to automate the discharge summary hospital course for neurology patients</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2023</year>
          <volume>30</volume>
          <issue>12</issue>
          <fpage>1995</fpage>
          <lpage>2003</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocad177</pub-id>
          <pub-id pub-id-type="medline">37639624</pub-id>
          <pub-id pub-id-type="pii">7252876</pub-id>
          <pub-id pub-id-type="pmcid">PMC10654848</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alambo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Banerjee</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Thirunarayan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cajita</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Improving the factual accuracy of abstractive clinical text summarization using multi-objective optimization</article-title>
          <source>Annu Int Conf IEEE Eng Med Biol Soc</source>
          <year>2022</year>
          <volume>2022</volume>
          <fpage>1615</fpage>
          <lpage>1618</lpage>
          <pub-id pub-id-type="doi">10.1109/EMBC48229.2022.9871798</pub-id>
          <pub-id pub-id-type="medline">36085755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jagessar</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nael</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salamon</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>AI-assisted summarization of radiologic reports: evaluating GPT3davinci, BARTcnn, LongT5booksum, LEDbooksum, LEDlegal, and LEDclinical</article-title>
          <source>AJNR Am J Neuroradiol</source>
          <year>2024</year>
          <volume>45</volume>
          <issue>2</issue>
          <fpage>244</fpage>
          <lpage>248</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://escholarship.org/uc/item/qt9h75s6r2"/>
          </comment>
          <pub-id pub-id-type="doi">10.3174/ajnr.A8102</pub-id>
          <pub-id pub-id-type="medline">38238092</pub-id>
          <pub-id pub-id-type="pii">ajnr.A8102</pub-id>
          <pub-id pub-id-type="pmcid">PMC11285993</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Leveraging summary guidance on medical report summarization</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2023</year>
          <volume>27</volume>
          <issue>10</issue>
          <fpage>5066</fpage>
          <lpage>5075</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2023.3304376</pub-id>
          <pub-id pub-id-type="medline">37566507</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Dou</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Performance and exploration of ChatGPT in medical examination, records and education in Chinese: pave the way for medical AI</article-title>
          <source>Int J Med Inform</source>
          <year>2023</year>
          <volume>177</volume>
          <fpage>105173</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2023.105173</pub-id>
          <pub-id pub-id-type="medline">37549499</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(23)00191-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McAuley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>EY</given-names>
            </name>
            <name name-style="western">
              <surname>Gentili</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>RadBERT: Adapting transformer-based language models to radiology</article-title>
          <source>Radiol Artif Intell</source>
          <year>2022</year>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e210258</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35923376"/>
          </comment>
          <pub-id pub-id-type="doi">10.1148/ryai.210258</pub-id>
          <pub-id pub-id-type="medline">35923376</pub-id>
          <pub-id pub-id-type="pmcid">PMC9344353</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>From softmax to nucleusmax: A novel sparse language model for Chinese radiology report summarization</article-title>
          <source>ACM Trans Asian Low Resour Lang Inf Process</source>
          <year>2023</year>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>1</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1145/3596219</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bajracharya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sills</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kapoor</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Berlowitz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Generation of patient after-visit summaries to support physicians</article-title>
          <year>2022</year>
          <conf-name>Proceedings of the 29th International Conference on Computational Linguistics</conf-name>
          <conf-date>October 1, 2022</conf-date>
          <conf-loc>Gyeongju, Republic of Korea</conf-loc>
          <fpage>6234</fpage>
          <lpage>6247</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2022.coling-1.544"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hartman</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Campion</surname>
              <given-names>TR</given-names>
            </name>
          </person-group>
          <article-title>A day-to-day approach for automating the hospital course section of the discharge summary</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2022</year>
          <volume>2022</volume>
          <fpage>216</fpage>
          <lpage>225</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35854728"/>
          </comment>
          <pub-id pub-id-type="medline">35854728</pub-id>
          <pub-id pub-id-type="pii">2327</pub-id>
          <pub-id pub-id-type="pmcid">PMC9285173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>BDKG at MEDIQA 2021: System report for the radiology report summarization task</article-title>
          <year>2021</year>
          <conf-name>Proceedings of the 20th Workshop on Biomedical Language Processing</conf-name>
          <conf-date>June 1, 2021</conf-date>
          <conf-loc>Virtual meeting</conf-loc>
          <fpage>103</fpage>
          <lpage>111</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van Veen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Uden</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Attias</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pareek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bluethgen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Polacin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Delbrouck</surname>
              <given-names>J-B</given-names>
            </name>
            <name name-style="western">
              <surname>Chaves</surname>
              <given-names>JZ</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pauly</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>RadAdapt: Radiology report summarization via lightweight domain adaptation of large language models</article-title>
          <year>2023</year>
          <conf-name>The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</conf-name>
          <conf-date>July 1, 2023</conf-date>
          <conf-loc>Toronto, ON</conf-loc>
          <fpage>449</fpage>
          <lpage>460</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chuang</surname>
              <given-names>YN</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>SPeC: A soft prompt–based calibration on performance variability of large language model in clinical notes summarization</article-title>
          <source>J Biomed Inform</source>
          <year>2024</year>
          <volume>151</volume>
          <fpage>104606</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(24)00024-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2024.104606</pub-id>
          <pub-id pub-id-type="medline">38325698</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(24)00024-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC11608453</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dligach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Churpek</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Afshar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Summarizing patients' problems from hospital progress notes using pre-trained sequence-to-sequence models</article-title>
          <source>Proc Int Conf Comput Ling</source>
          <year>2022</year>
          <volume>2022</volume>
          <fpage>2979</fpage>
          <lpage>2991</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36268128"/>
          </comment>
          <pub-id pub-id-type="medline">36268128</pub-id>
          <pub-id pub-id-type="pmcid">PMC9581107</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Majumder</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Effective aggregation of various summarization techniques</article-title>
          <source>Inf Process Manag</source>
          <year>2018</year>
          <volume>54</volume>
          <issue>2</issue>
          <fpage>145</fpage>
          <lpage>158</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2017.11.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A survey on evaluation of out-of-distribution generalization</article-title>
          <source>ArXiv. Preprint posted on March 4, 2024</source>
          <year>2024</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2403.01874"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2403.01874</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shahar</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>An automated knowledge-based textual summarization system for longitudinal, multivariate clinical data</article-title>
          <source>J Biomed Inform</source>
          <year>2016</year>
          <volume>61</volume>
          <fpage>159</fpage>
          <lpage>175</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2016.03.022</pub-id>
          <pub-id pub-id-type="medline">27039119</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(16)30015-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fabbri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kry?ci?ski</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>McCann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Radev</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>SummEval: Re-evaluating summarization evaluation</article-title>
          <source>Trans Assoc Comput Linguist</source>
          <year>2021</year>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>391</fpage>
          <lpage>409</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2021.tacl-1.24/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00373</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lenharo</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The testing of AI in medicine is a mess. Here's how it should be done</article-title>
          <source>Nature</source>
          <year>2024</year>
          <volume>632</volume>
          <issue>8026</issue>
          <fpage>722</fpage>
          <lpage>724</lpage>
          <pub-id pub-id-type="doi">10.1038/d41586-024-02675-0</pub-id>
          <pub-id pub-id-type="medline">39169244</pub-id>
          <pub-id pub-id-type="pii">10.1038/d41586-024-02675-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Soltan</surname>
              <given-names>AAS</given-names>
            </name>
            <name name-style="western">
              <surname>Clifton</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Machine learning generalizability across healthcare settings: insights from multi-site COVID-19 screening</article-title>
          <source>NPJ Digit Med</source>
          <year>2022</year>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>69</fpage>
          <pub-id pub-id-type="doi">10.1038/s41746-022-00614-9</pub-id>
          <pub-id pub-id-type="medline">35672368</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-022-00614-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC9174159</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikesalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Suleyman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Key challenges for delivering clinical impact with artificial intelligence</article-title>
          <source>BMC Med</source>
          <year>2019</year>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>195</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-019-1426-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12916-019-1426-2</pub-id>
          <pub-id pub-id-type="medline">31665002</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12916-019-1426-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6821018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AEW</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/sdata.2016.35"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Large language models for reducing clinicians' documentation burden</article-title>
          <source>Nat Med</source>
          <year>2024</year>
          <volume>30</volume>
          <issue>4</issue>
          <fpage>942</fpage>
          <lpage>943</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-02888-w</pub-id>
          <pub-id pub-id-type="medline">38561439</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-02888-w</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Norori</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Aellen</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Faraci</surname>
              <given-names>FD</given-names>
            </name>
            <name name-style="western">
              <surname>Tzovara</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Addressing bias in big data and AI for health care: a call for open science</article-title>
          <source>Patterns</source>
          <year>2021</year>
          <volume>2</volume>
          <issue>10</issue>
          <fpage>100347</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://boris.unibe.ch/id/eprint/161897"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.patter.2021.100347</pub-id>
          <pub-id pub-id-type="medline">34693373</pub-id>
          <pub-id pub-id-type="pii">S2666-3899(21)00202-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8515002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Röösli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bozkurt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Peeking into a black box, the fairness and generalizability of a MIMIC-III benchmarking model</article-title>
          <source>Sci Data</source>
          <year>2022</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41597-021-01110-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41597-021-01110-7</pub-id>
          <pub-id pub-id-type="medline">35075160</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41597-021-01110-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC8786878</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gershanik</surname>
              <given-names>EF</given-names>
            </name>
            <name name-style="western">
              <surname>Lacson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Khorasani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Critical finding capture in the impression section of radiology reports</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>465</fpage>
          <lpage>469</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22195100"/>
          </comment>
          <pub-id pub-id-type="medline">22195100</pub-id>
          <pub-id pub-id-type="pmcid">PMC3243237</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Supriyono</collab>
            <name name-style="western">
              <surname>Wibawa</surname>
              <given-names>AP</given-names>
            </name>
            <collab>Suyono</collab>
            <name name-style="western">
              <surname>Kurniawan</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>A survey of text summarization: techniques, evaluation and challenges</article-title>
          <source>J. Nat. Lang. Process</source>
          <year>2024</year>
          <volume>7</volume>
          <fpage>100070</fpage>
          <pub-id pub-id-type="doi">10.1016/j.nlp.2024.100070</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shakil</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Farooq</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kalita</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Abstractive text summarization: State of the art, challenges, and improvements</article-title>
          <source>Neurocomputing</source>
          <year>2024</year>
          <volume>603</volume>
          <fpage>128255</fpage>
          <pub-id pub-id-type="doi">10.1016/j.neucom.2024.128255</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Magnuson</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Kingsley</surname>
              <given-names>TC</given-names>
            </name>
          </person-group>
          <article-title>Why mayo clinic is embracing the cloud and what this means for clinicians and researchers</article-title>
          <source>Mayo Clin Proc Innov Qual Outcomes</source>
          <year>2021</year>
          <volume>5</volume>
          <issue>6</issue>
          <fpage>969</fpage>
          <lpage>973</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2542-4548(21)00129-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.mayocpiqo.2021.08.010</pub-id>
          <pub-id pub-id-type="medline">34632298</pub-id>
          <pub-id pub-id-type="pii">S2542-4548(21)00129-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8488458</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chevrier</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Foufi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Gaudet-Blavignac</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Robert</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lovis</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Use and understanding of anonymization and de-identification in the biomedical literature: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>e13484</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/5/e13484/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13484</pub-id>
          <pub-id pub-id-type="medline">31152528</pub-id>
          <pub-id pub-id-type="pii">v21i5e13484</pub-id>
          <pub-id pub-id-type="pmcid">PMC6658290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gallifant</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Afshar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ameen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aphinyanaphongs</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cacciamani</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dligach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Daneshjou</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Landman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Moreno</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Munch</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Restrepo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Umeton</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gichoya</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>KGM</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Bitterman</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>The TRIPOD-LLM reporting guideline for studies using large language models</article-title>
          <source>Nat Med</source>
          <year>2025</year>
          <volume>31</volume>
          <issue>1</issue>
          <fpage>60</fpage>
          <lpage>69</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-03425-5</pub-id>
          <pub-id pub-id-type="medline">39779929</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-03425-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van der Lee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gatt</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van Miltenburg</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Krahmer</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Human evaluation of automatically generated text: current trends and best practice guidelines</article-title>
          <source>Comput Speech Lang</source>
          <year>2021</year>
          <volume>67</volume>
          <fpage>101151</fpage>
          <pub-id pub-id-type="doi">10.1016/j.csl.2020.101151</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krishna</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bransom</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kuehl</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Iyyer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dasigi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>LongEval: Guidelines for human evaluation of faithfulness in long-form summarization</article-title>
          <year>2023</year>
          <conf-name>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</conf-name>
          <conf-date>May 1, 2023</conf-date>
          <conf-loc>Dubrovnik, Croatia</conf-loc>
          <fpage>1650</fpage>
          <lpage>1669</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fabbri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Joty</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Revisiting the gold standard: grounding summarization evaluation with robust human evaluation</article-title>
          <year>2023</year>
          <conf-name>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name>
          <conf-date>July 1, 2023</conf-date>
          <conf-loc>Toronto, Canada</conf-loc>
          <fpage>4140</fpage>
          <lpage>4170</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ladhak</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Durmus</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>McKeown</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hashimoto</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Benchmarking large language models for news summarization</article-title>
          <source>Trans Assoc Comput Linguist</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>39</fpage>
          <lpage>57</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2024.tacl-1.3/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00632</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tam</surname>
              <given-names>TYC</given-names>
            </name>
            <name name-style="western">
              <surname>Sivarajkumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kapoor</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stolyar</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Polanska</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>McCarthy</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Osterhoudt</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Visweswaran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mathur</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cacciamani</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A framework for human evaluation of large language models in healthcare derived from literature review</article-title>
          <source>NPJ Digit Med</source>
          <year>2024</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>258</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01258-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01258-7</pub-id>
          <pub-id pub-id-type="medline">39333376</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01258-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC11437138</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>BY</given-names>
            </name>
          </person-group>
          <article-title>The good, the bad, and the greedy: Evaluation of LLMs should not ignore non-determinism</article-title>
          <source>ArXiv. Preprint posted on July 15, 2024</source>
          <year>2024</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2407.10457"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2407.10457</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cabitza</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Campagner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Soares</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>García de Guadiana-Romualdo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Challa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sulejmani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Seghezzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carobene</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The importance of being external. methodological insights for the external validation of machine learning models in medicine</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2021</year>
          <volume>208</volume>
          <fpage>106288</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hdl.handle.net/10281/324839"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2021.106288</pub-id>
          <pub-id pub-id-type="medline">34352688</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(21)00362-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Futoma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Simons</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Panch</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi-Velez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>The myth of generalisability in clinical research and machine learning in health care</article-title>
          <source>Lancet Digit Health</source>
          <year>2020</year>
          <volume>2</volume>
          <issue>9</issue>
          <fpage>e489</fpage>
          <lpage>e492</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(20)30186-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(20)30186-2</pub-id>
          <pub-id pub-id-type="medline">32864600</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(20)30186-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7444947</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Evaluating large language models for use in healthcare: A framework for translational value assessment</article-title>
          <source>Informatics in Medicine Unlocked</source>
          <year>2023</year>
          <volume>41</volume>
          <fpage>101304</fpage>
          <pub-id pub-id-type="doi">10.1016/j.imu.2023.101304</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bommasani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Adeli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Arora</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>von Arx</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bernstein</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Bohg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bosselut</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brunskill</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Brynjolfsson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Buch</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Card</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Castellon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chatterji</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Creel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>JQ</given-names>
            </name>
            <name name-style="western">
              <surname>Demszky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Donahue</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Doumbouya</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Durmus</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ermon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Etchemendy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ethayarajh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Fei-Fei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Finn</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gale</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gillespie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Goel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>On the opportunities and risks of foundation models</article-title>
          <source>ArXiv. Preprint posted on July 12, 2022</source>
          <year>2021</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2108.07258"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Hond</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Leeuwenberg</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bartels</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>van Buchem</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kant</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>van Smeden</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>From text to treatment: The crucial role of validation for generative large language models in health care</article-title>
          <source>Lancet Digit Health</source>
          <year>2024</year>
          <volume>6</volume>
          <issue>7</issue>
          <fpage>e441</fpage>
          <lpage>e443</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(24)00111-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(24)00111-0</pub-id>
          <pub-id pub-id-type="medline">38906607</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(24)00111-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="web">
          <article-title>Article 9: Risk management system</article-title>
          <source>EU Artificial Intelligence Act</source>
          <access-date>2025-02-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://artificialintelligenceact.eu/article/9/">https://artificialintelligenceact.eu/article/9/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>A survey on hallucination in large language models: principles, taxonomy, challenges, and open questions</article-title>
          <source>ACM Trans Inf Syst</source>
          <year>2025</year>
          <volume>43</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1145/3703155</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Frieske</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ishii</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Madotto</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Survey of hallucination in natural language generation</article-title>
          <source>ACM Comput Surv</source>
          <year>2023</year>
          <volume>55</volume>
          <issue>12</issue>
          <fpage>1</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1145/3571730</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Asgari</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Montaña-Brown</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dubois</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Khalil</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Balloch</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pimenta</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A framework to assess clinical safety and hallucination rates of LLMs for medical text summarisation</article-title>
          <source>medRxiv</source>
          <year>2024</year>
          <pub-id pub-id-type="doi">10.1101/2024.09.12.24313556</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pfohl</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Cole-Lewis</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sayres</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Neal</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Asiedu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dieng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tomasev</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rashid</surname>
              <given-names>QM</given-names>
            </name>
            <name name-style="western">
              <surname>Azizi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rostamzadeh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Schaekermann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Walton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Parrish</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nagpal</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dewitt</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mansfield</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Prakash</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Heller</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikesalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Semturs</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Barral</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Matias</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Smith-Loud</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Horn</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Singhal</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A toolbox for surfacing health equity harms and biases in large language models</article-title>
          <source>Nat Med</source>
          <year>2024</year>
          <volume>30</volume>
          <issue>12</issue>
          <fpage>3590</fpage>
          <lpage>3600</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-03258-2</pub-id>
          <pub-id pub-id-type="medline">39313595</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-03258-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC11645264</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gallegos</surname>
              <given-names>IO</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Barrow</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tanjim</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dernoncourt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>NK</given-names>
            </name>
          </person-group>
          <article-title>Bias and fairness in large language models: a survey</article-title>
          <source>Comput Linguist</source>
          <year>2024</year>
          <volume>50</volume>
          <fpage>1097</fpage>
          <lpage>1179</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2024.cl-3.8/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/coli_a_00524</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zack</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Suzgun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Gichoya</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jurafsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Abdulnour</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Assessing the potential of GPT-4 to perpetuate racial and gender biases in health care: a model evaluation study</article-title>
          <source>Lancet Digit Health</source>
          <year>2024</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e12</fpage>
          <lpage>e22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(23)00225-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(23)00225-X</pub-id>
          <pub-id pub-id-type="medline">38123252</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(23)00225-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="web">
          <source>PhysioNet</source>
          <access-date>2025-04-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://physionet.org/news/post/gpt-responsible-use">https://physionet.org/news/post/gpt-responsible-use</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>JCL</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>William</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Chew</surname>
              <given-names>LST</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi-Velez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Savulescu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DSW</given-names>
            </name>
          </person-group>
          <article-title>Ethical and regulatory challenges of large language models in medicine</article-title>
          <source>Lancet Digit Health</source>
          <year>2024</year>
          <volume>6</volume>
          <issue>6</issue>
          <fpage>e428</fpage>
          <lpage>e432</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(24)00061-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(24)00061-X</pub-id>
          <pub-id pub-id-type="medline">38658283</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(24)00061-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>De-identifying medical patient data doesn't protect our privacy</article-title>
          <source>Stanford University Human-Centered Artificial Intelligence</source>
          <access-date>2025-04-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hai.stanford.edu/news/de-identifying-medical-patient-data-doesnt-protect-our-privacy?utm_source=chatgpt.com">https://hai.stanford.edu/news/de-identifying-medical-patient-data-doesnt-protect-our-privacy?utm_source=chatgpt.com</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khalid</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Qayyum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bilal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Fuqaha</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Qadir</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving artificial intelligence in healthcare: techniques and applications</article-title>
          <source>Comput Biol Med</source>
          <year>2023</year>
          <volume>158</volume>
          <fpage>106848</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0010-4825(23)00313-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2023.106848</pub-id>
          <pub-id pub-id-type="medline">37044052</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(23)00313-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref89">
        <label>89</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pollock</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>MDJ</given-names>
            </name>
            <name name-style="western">
              <surname>Khalil</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>McInerney</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Alexander</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tricco</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>de Moraes</surname>
              <given-names>ÉB</given-names>
            </name>
            <name name-style="western">
              <surname>Godfrey</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Pieper</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Saran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stern</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Munn</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Recommendations for the extraction, analysis, and presentation of results in scoping reviews</article-title>
          <source>JBI Evid Synth</source>
          <year>2023</year>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>520</fpage>
          <lpage>532</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pubmed.ncbi.nlm.nih.gov/36081365/"/>
          </comment>
          <pub-id pub-id-type="doi">10.11124/JBIES-22-00123</pub-id>
          <pub-id pub-id-type="medline">36081365</pub-id>
          <pub-id pub-id-type="pii">02174543-990000000-00076</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
