<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e54580</article-id>
      <article-id pub-id-type="pmid">38551633</article-id>
      <article-id pub-id-type="doi">10.2196/54580</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>An Entity Extraction Pipeline for Medical Text Records Using Large Language Models: Analytical Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Jin</surname>
            <given-names>Qiao</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Tannier</surname>
            <given-names>Xavier</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Torii</surname>
            <given-names>Manabu</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Lei</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0005-2757-1748</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ma</surname>
            <given-names>Yinyao</given-names>
          </name>
          <degrees>MS, MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2755-2169</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Bi</surname>
            <given-names>Wenshuai</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-3011-896X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Lv</surname>
            <given-names>Hanlin</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>BGI Research</institution>
            <addr-line>1-2F, Building 2, Wuhan Optics Valley International Biomedical Enterprise Accelerator Phase 3.1</addr-line>
            <addr-line>No 388 Gaoxin Road 2, Donghu New Technology Development Zone</addr-line>
            <addr-line>Wuhan, 430074</addr-line>
            <country>China</country>
            <phone>86 18707190886</phone>
            <email>lvhanlin@genomics.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1876-7846</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Yuxiang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1575-3692</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>BGI Research</institution>
        <addr-line>Wuhan</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Guangdong Bigdata Engineering Technology Research Center for Life Sciences</institution>
        <institution>BGI Research</institution>
        <addr-line>Shenzhen</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Obstetrics</institution>
        <institution>People’s Hospital of Guangxi Zhuang Autonomous Region</institution>
        <addr-line>Nanning</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hanlin Lv <email>lvhanlin@genomics.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>29</day>
        <month>3</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e54580</elocation-id>
      <history>
        <date date-type="received">
          <day>15</day>
          <month>11</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>1</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>23</day>
          <month>1</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>14</day>
          <month>2</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Lei Wang, Yinyao Ma, Wenshuai Bi, Hanlin Lv, Yuxiang Li. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 29.03.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e54580" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The study of disease progression relies on clinical data, including text data, and extracting valuable features from text data has been a research hot spot. With the rise of large language models (LLMs), semantic-based extraction pipelines are gaining acceptance in clinical research. However, the security and feature hallucination issues of LLMs require further attention.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to introduce a novel modular LLM pipeline, which could semantically extract features from textual patient admission records.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The pipeline was designed to process a systematic succession of concept extraction, aggregation, question generation, corpus extraction, and question-and-answer scale extraction, which was tested via 2 low-parameter LLMs: Qwen-14B-Chat (QWEN) and Baichuan2-13B-Chat (BAICHUAN). A data set of 25,709 pregnancy cases from the People’s Hospital of Guangxi Zhuang Autonomous Region, China, was used for evaluation with the help of a local expert’s annotation. The pipeline was evaluated with the metrics of accuracy and precision, null ratio, and time consumption. Additionally, we evaluated its performance via a quantified version of Qwen-14B-Chat on a consumer-grade GPU.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The pipeline demonstrates a high level of precision in feature extraction, as evidenced by the accuracy and precision results of Qwen-14B-Chat (95.52% and 92.93%, respectively) and Baichuan2-13B-Chat (95.86% and 90.08%, respectively). Furthermore, the pipeline exhibited low null ratios and variable time consumption. The INT4-quantified version of QWEN delivered an enhanced performance with 97.28% accuracy and a 0% null ratio.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The pipeline exhibited consistent performance across different LLMs and efficiently extracted clinical features from textual data. It also showed reliable performance on consumer-grade hardware. This approach offers a viable and effective solution for mining clinical research data from textual records.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>clinical data extraction</kwd>
        <kwd>large language models</kwd>
        <kwd>feature hallucination</kwd>
        <kwd>modular approach</kwd>
        <kwd>unstructured data processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Clinical text data have been widely recognized in data research due to their inclusion of multisource information [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>] (eg, patient subjective statements, past objective facts, doctors’ diagnostic processes, and summary records). Extracting useful information from text data could serve as a crucial supplement to the study of disease progression; it could complement objective indicators dependent on laboratory tests and examinations [<xref ref-type="bibr" rid="ref3">3</xref>], which has consistently been a hot research topic [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      <p>Historically, methods for text data extraction mainly include the following:</p>
      <list list-type="bullet">
        <list-item>
          <p>Manual annotation: scales are designed based on clinical and research experience, followed by manual field extraction [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        </list-item>
        <list-item>
          <p>Rule extraction: concepts from established knowledge base, such as <italic>International Classification of Diseases, Tenth Revision</italic> [<xref ref-type="bibr" rid="ref9">9</xref>], are used for concept term extraction. This process is typically based on similarity algorithms and manual assistance to extract terms and their attributes (eg, negations and dependency relationships) [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
        </list-item>
        <list-item>
          <p>Named entity recognition or natural language processing algorithms: supervised learning methods, such as pretrained models like T5 [<xref ref-type="bibr" rid="ref11">11</xref>], Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref12">12</xref>], and BERT’s variants [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>], with manual annotation to enhance semantic comprehension capabilities [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        </list-item>
      </list>
      <p>The task of extracting features from vast unstructured text presents itself as a daunting, labor-intensive, and time-consuming endeavor [<xref ref-type="bibr" rid="ref18">18</xref>], for the following reasons:</p>
      <list list-type="bullet">
        <list-item>
          <p>It is challenging to determine the dimension of extracted features initially, and from another perspective, confining the feature dimension means constraining the research scope from the outset [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        </list-item>
        <list-item>
          <p>Given the inherent subjectivity and potential biases of recording subjects, solely relying on algorithms without annotation typically results in low accuracy and recall [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        </list-item>
        <list-item>
          <p>Achieving higher accuracy with a broader feature scope, and the required human effort involved, is typically nonlinear [<xref ref-type="bibr" rid="ref4">4</xref>], and the difficulty becomes apparent when confronted with massive real-world data.</p>
        </list-item>
      </list>
      <p>The advent of large language models (LLMs) has paved a new path for the dilemma in clinical text extraction [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. In the realm of natural language understanding research, generative large models, represented notably by ChatGPT [<xref ref-type="bibr" rid="ref24">24</xref>] since 2022, have achieved unimaginable capabilities in semantic dimensions, leveraging the emergent intelligence from vast parameter scales. However, there are numerous considerations and limitations in their application, as follows:</p>
      <list list-type="bullet">
        <list-item>
          <p>High-performing LLMs, such as OpenAI ChatGPT and Google Bard [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>], are currently not open source, and patient data need to be submitted to their platform for analysis, presenting security challenges [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>].</p>
        </list-item>
        <list-item>
          <p>Open-source LLMs with high intelligence generally require a large number of parameters (10-100 billion), which are hard to support on consumer-level graphics processing units (GPUs) [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        </list-item>
        <list-item>
          <p>Low-parameter (around 10 billion) LLMs, typically require multistrategy support when dealing with tasks in certain vertical segments [<xref ref-type="bibr" rid="ref30">30</xref>] (eg, fine-tuning, knowledge base or knowledge graph support, complex Chain of Thought (CoT) [<xref ref-type="bibr" rid="ref31">31</xref>] along with its derivatives, and even global training) and are accompanied by various anomaly issues, including feature hallucination.</p>
        </list-item>
      </list>
      <p>Although the application of LLM faces various potential limitations and challenges, as mentioned above, the foundational entity extraction and understanding capabilities of LLMs can still be used for low-cost extraction of clinical text data through meticulous prompt design, guidance combining CoT, and standardized examples [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref32">32</xref>].</p>
      <p>In this study, we aimed to extract valuable features from a series of given patient admission records, which include the chief complaint and the medical histories. In light of this task, we introduced a modular LLM approach, which divides the entire extraction path into several smaller steps, with each modular LLM handling these basic steps automatically. We adopted the core idea of LLM agents [<xref ref-type="bibr" rid="ref30">30</xref>] and self-consistency with CoT [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
      <p>To experiment with this approach, we implemented 2 low-parameter LLMs in a local environment and compared their performances within a retrospective cohort of pregnancy to provide a reference that future researchers might draw upon.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Preparation</title>
        <sec>
          <title>Data Sources</title>
          <p>In this study, the text corpus was compiled from two primary sources:</p>
          <list list-type="order">
            <list-item>
              <p>Chief complaints and medical histories, exemplified in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, were extracted from inpatient admission records of an established cohort at the People’s Hospital of Guangxi Zhuang Autonomous Region in China. The established cohort for the preeclampsia risk study consisted of 25,709 pregnancies that received prenatal care between the 11th and 13th weeks of gestation from April 2012 to September 2021.</p>
            </list-item>
            <list-item>
              <p>Clinical practice guidelines consisted of the 2018 guidelines from the American College of Obstetricians and Gynecologists [<xref ref-type="bibr" rid="ref34">34</xref>] and the 2019 guidelines from the National Institute for Health and Care Excellence [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
            </list-item>
          </list>
          <p>To ensure linguistic consistency, the entire corpus was maintained in Chinese.</p>
        </sec>
        <sec>
          <title>Model Deployment</title>
          <p>We deployed 2 most exemplary LLMs in the Chinese domain until September 2023 in an intranet security environment independently: Qwen-14B-Chat (QWEN) [<xref ref-type="bibr" rid="ref36">36</xref>] and Baichuan2-13B-Chat (BAICHUAN) [<xref ref-type="bibr" rid="ref37">37</xref>]. In the environment, the server cluster used NVIDIA DGX-A100 (2×40 G) GPU nodes. The QWEN used 29 GB of storage and 27 GB of GPU memory, while the BAICHUAN used 26 GB of storage and 28.9 GB of GPU memory. Both models operated solely on physically isolated GPUs, and access was facilitated through the OpenAI [<xref ref-type="bibr" rid="ref38">38</xref>] format and FastChat [<xref ref-type="bibr" rid="ref39">39</xref>]. The LLMs were built upon PyTorch 2.0, with the temperature set to 0 and max_token adjusted task by task.</p>
        </sec>
        <sec>
          <title>Experimental Path</title>
          <p>In this study, we have introduced an approach that autonomously extracts valuable textual features. Diverging from traditional LLM applications, we used an “external-COT” strategy, dividing the process into several controllable steps, as illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
          <p>The extraction approach could be divided into four parts: (1) concept preparation, that is, extracting existing concepts from the corpus and selecting concerning concepts; (2) corpus preparation, that is, deidentifying raw data and preparing the corpus in accordance with the selected concept; (3) prompt design for different LLM tasks; (4) question-and-answer (Q&amp;A) scales, that is, transforming concepts into question templates and extracting corresponding scales by LLMs.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>The flowchart of extraction approach. LLM: large language model; Q&amp;A: question and answer; SNOMED CT: Systematised Nomenclature of Medicine Clinical Term.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e54580_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Prompt Design</title>
          <p>The design of prompt templates is fundamental to efficient and accurate extraction. Prior to processing the entire data set, an initial evaluation was conducted on 100 observations to assess the effectiveness of the templates, allowing for continuous refinement of prompt strategies and orientations. An appropriate template was defined based on the following criteria: (1) absence of redundant content generation, (2) consistent and uniform efficiency, and (3) infrequent occurrence of feature hallucination.</p>
          <p>We adopted a 4-paragraph structure, referring to the prompt engineering suggestions of QWEN and BAICHUAN, as follows:</p>
          <list list-type="order">
            <list-item>
              <p>Context section: defines the role and task, provides a basic understanding, and establishes a behavioral baseline for the model.</p>
            </list-item>
            <list-item>
              <p>Instruction section: outlines the execution steps, uses the CoT methodology, and provides examples to ensure guided model operation.</p>
            </list-item>
            <list-item>
              <p>Input data section: manages various inputs to meet diverse information needs.</p>
            </list-item>
            <list-item>
              <p>Output indicator section: specifies the output format and standards, setting clear expectations for the output.</p>
            </list-item>
          </list>
          <p>To avoid input bias, the prompt templates for QWEN and BAICHUAN were maintained without any modifications. In addition, we conducted experiments using 100 observations at different levels of concurrency to select the most optimal configuration.</p>
        </sec>
        <sec>
          <title>Concept Extraction and Aggregation</title>
          <p>We initially extracted all discernible concepts from chief complaints and medical histories using LLMs with a designed prompt 1, and concepts were retained only with a manifestation frequency exceeding 5% occurrences. To reduce potential attention bias and expand the range of identified concepts, we also included concepts from clinical practice guidelines related to preeclampsia, particularly the American College of Obstetricians and Gynecologists 2018 guidelines and the National Institute for Health and Care Excellence 2019 guidelines.</p>
          <p>As we defined in prompt 1, the extracted concepts were formatted using the Systematised Nomenclature of Medicine Clinical Terms (SNOMED CT) vocabulary within the Clinical Findings and Observations domain [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
          <p>To mitigate potential output errors from LLMs, such as concepts not belonging to the Clinical Findings and Observations domain, or even errors outside of the SNOMED CT vocabulary, we implemented a rule-based matching approach to filter extraction inaccuracies.</p>
          <p>Furthermore, in this research, we aimed to extract concepts with diverse semantic expressions (including diagnoses, various medical histories, symptoms, observations, interventions, and types of examination). To accomplish this, local experts manually filtered out concepts embedded in structured text, such as dates or numbers.</p>
        </sec>
        <sec>
          <title>Question Generation</title>
          <p>After the extraction and aggregation of concepts, they were transformed into specific questions by LLMs as question templates for subsequent data extraction. In this section, we leveraged ChatGPT4.0 as a question generator to produce a basic set of questions, which were then refined by local experts for specificity based on its performance across 100 observations.</p>
        </sec>
        <sec>
          <title>Q&amp;A Scale Extraction</title>
          <p>To avoid contextual and temporal event confusion leading to incorrect responses (eg, confusing current medical history with a past medical history or confusing the patient’s medical history with that of family members), we preextracted the corpus using two strategies: (1) based on the position of the question templates and (2) based on the sentence containing the concepts. The extracted corpus was then labeled with the corresponding question templates for the subsequent extraction of Q&amp;A scales.</p>
          <p>The refined corpus, combined with corresponding question templates, guided a systematic extraction process with 2 LLMs, forming Q&amp;A scales for further application.</p>
          <p>Each question probed the LLMs, and the extracted sentences formed the basis of the generated responses. This approach enabled a logical mapping of questions to relevant text, ultimately improving the accuracy and efficiency of feature extraction.</p>
        </sec>
        <sec>
          <title>Evaluation</title>
          <p>Given the practical constraints and the objective of minimizing manual intervention, it was unfeasible to validate all answer scales individually across a Q&amp;A space containing 68 questions and 25,709 observations. Therefore, a 3-fold assessment strategy was developed, as explained in the sections that follow.</p>
        </sec>
        <sec>
          <title>Accuracy and Precision</title>
          <p>A subset of 1500 observations chosen at random was manually annotated in collaboration with local experts, serving as the gold standard. The precision of positive identifications by both LLMs was assessed against a specified benchmark.</p>
        </sec>
        <sec>
          <title>Null Ratio</title>
          <p>The null ratio of both LLMs was independently measured across all 25,709 observations. Empty or meaningless outputs (symbols and gibberish) were identified as null outputs, and the null ratio was then calculated as the proportion of such responses to the total.</p>
        </sec>
        <sec>
          <title>Time Consumption</title>
          <p>The efficiency of the extraction process was evaluated by measuring the time taken by the 2 LLMs to respond to the questions across all 25,709 observations.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study was approved by the People’s Hospital of the Guangxi Zhuang Autonomous Region in China (KT-KJT-2021-67). The requirement for informed consent was waived, due to the retrospective nature of the study, and all clinical data were deidentified and anonymized.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Path Decomposition Overview</title>
        <sec>
          <title>Prompt Template</title>
          <p>Through trials with the prompt template on 100 observations, we selected the template that demonstrated optimal consistency, as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Prompt templates of information extraction and question-and-answer scales. A 4-paragraph structure was implemented for a prompt design using a few-shot Chain-of-Thought prompting. The original version, written in Chinese, was translated into English. (A) Extracting concepts and terms; (B) scaling questions. As both deployed large language models were pretrained with the <italic>International Classification of Diseases, Tenth Revision</italic> corpus, they could directly engage in concept extraction.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e54580_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Merged Concepts</title>
          <p>After merging all concepts, we filtered out those that appeared less than 5% of the time. A total of 117 concepts and terms were listed in Table S1 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        </sec>
        <sec>
          <title>Question Template</title>
          <p>Then we selected and transferred 68 concepts into question formats, for further Q&amp;A scales. The detailed questions and their corresponding concepts are listed in Table S2 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        </sec>
        <sec>
          <title>Scale Extraction</title>
          <p>We identified that the optimal performance in Q&amp;A scale extraction occurs with a concurrency of 3 requests, enhancing speed by 17.9% compared to a single request. Furthermore, we used a max_token restriction strategy, capping it at 20, to optimize inference speed.</p>
          <p>Ultimately, within the 2D Q&amp;A space formed by the answer scales, there were a total of 68 question columns and 25,709 observations (listed in <xref ref-type="table" rid="table1">Table 1</xref>).</p>
          <p>We used accuracy and precision metrics for assessing the accuracy of LLMs across 1500 observations. Furthermore, we used 2 parameters—null ratio and time consumption—in 25,709 observations to evaluate the consistency and efficiency of the 2 LLMs, respectively.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Question-and-answer scales for Qwen-14B-Chat (QWEN) and Baichuan2-13B-Chat (BAICHUAN).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="350"/>
              <col width="110"/>
              <col width="110"/>
              <col width="110"/>
              <col width="110"/>
              <col width="110"/>
              <col width="100"/>
              <thead>
                <tr valign="top">
                  <td>Concepts</td>
                  <td colspan="3">QWEN</td>
                  <td colspan="3">BAICHUAN</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Positive ratio (%)</td>
                  <td>Negative ratio (%)</td>
                  <td>Null ratio (%)</td>
                  <td>Positive ratio (%)</td>
                  <td>Negative ratio (%)</td>
                  <td>Null ratio (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Menstrual color</td>
                  <td>—<sup>a</sup></td>
                  <td>—</td>
                  <td>0.00</td>
                  <td>—</td>
                  <td>—</td>
                  <td>0.67</td>
                </tr>
                <tr valign="top">
                  <td>Menstrual flow</td>
                  <td>—</td>
                  <td>—</td>
                  <td>0.00</td>
                  <td>—</td>
                  <td>—</td>
                  <td>0.80</td>
                </tr>
                <tr valign="top">
                  <td>Pregnancy weight gain<sup>b</sup></td>
                  <td>—</td>
                  <td>—</td>
                  <td>1.60</td>
                  <td>—</td>
                  <td>—</td>
                  <td>2.53</td>
                </tr>
                <tr valign="top">
                  <td>Abdominal bloating</td>
                  <td>26.27</td>
                  <td>73.73</td>
                  <td>0.00</td>
                  <td>38.40</td>
                  <td>61.60</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Abdominal pain</td>
                  <td>49.00</td>
                  <td>51.00</td>
                  <td>0.00</td>
                  <td>43.13</td>
                  <td>52.00</td>
                  <td>4.87</td>
                </tr>
                <tr valign="top">
                  <td>Amniocentesis</td>
                  <td>1.00</td>
                  <td>99.00</td>
                  <td>0.00</td>
                  <td>0.73</td>
                  <td>98.87</td>
                  <td>0.40</td>
                </tr>
                <tr valign="top">
                  <td>Aspirin use</td>
                  <td>1.40</td>
                  <td>98.60</td>
                  <td>0.00</td>
                  <td>1.40</td>
                  <td>98.60</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Bilateral adnexal masses</td>
                  <td>4.87</td>
                  <td>95.13</td>
                  <td>0.00</td>
                  <td>0.73</td>
                  <td>99.20</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Bilateral lower limb edema</td>
                  <td>51.53</td>
                  <td>48.47</td>
                  <td>0.00</td>
                  <td>4.87</td>
                  <td>95.13</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Blood glucose screening</td>
                  <td>17.87</td>
                  <td>82.13</td>
                  <td>0.00</td>
                  <td>28.13</td>
                  <td>71.40</td>
                  <td>0.47</td>
                </tr>
                <tr valign="top">
                  <td>Cervical secretions</td>
                  <td>3.53</td>
                  <td>96.47</td>
                  <td>0.00</td>
                  <td>3.67</td>
                  <td>96.33</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Chest tightness</td>
                  <td>2.87</td>
                  <td>97.13</td>
                  <td>0.00</td>
                  <td>4.40</td>
                  <td>95.53</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Cold or flu</td>
                  <td>1.87</td>
                  <td>98.13</td>
                  <td>0.00</td>
                  <td>3.07</td>
                  <td>96.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Convulsions</td>
                  <td>2.27</td>
                  <td>97.73</td>
                  <td>0.00</td>
                  <td>0.33</td>
                  <td>99.67</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Dizziness</td>
                  <td>7.13</td>
                  <td>92.87</td>
                  <td>0.00</td>
                  <td>1.87</td>
                  <td>98.13</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Drinking</td>
                  <td>0.00</td>
                  <td>100.00</td>
                  <td>0.00</td>
                  <td>0.00</td>
                  <td>100.00</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Early pregnancy reaction or symptoms</td>
                  <td>85.93</td>
                  <td>14.07</td>
                  <td>0.00</td>
                  <td>47.53</td>
                  <td>52.13</td>
                  <td>0.33</td>
                </tr>
                <tr valign="top">
                  <td>Family history (asthma)</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                  <td>1.93</td>
                  <td>98.07</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Family history (autoimmune disease)</td>
                  <td>0.20</td>
                  <td>99.80</td>
                  <td>0.00</td>
                  <td>3.07</td>
                  <td>96.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Family history (diabetes mellitus)</td>
                  <td>1.67</td>
                  <td>98.33</td>
                  <td>0.00</td>
                  <td>2.33</td>
                  <td>97.67</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Family history (drug allergy)</td>
                  <td>0.01</td>
                  <td>99.99</td>
                  <td>0.00</td>
                  <td>5.67</td>
                  <td>94.33</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Family history (heart disease)</td>
                  <td>1.60</td>
                  <td>98.40</td>
                  <td>0.00</td>
                  <td>2.27</td>
                  <td>97.73</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Family history (hematologic disease)</td>
                  <td>0.13</td>
                  <td>99.87</td>
                  <td>0.00</td>
                  <td>0.73</td>
                  <td>98.87</td>
                  <td>0.40</td>
                </tr>
                <tr valign="top">
                  <td>Family history (hypertension)</td>
                  <td>3.87</td>
                  <td>96.13</td>
                  <td>0.00</td>
                  <td>3.93</td>
                  <td>96.07</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Family history (kidney disease)</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                  <td>1.40</td>
                  <td>98.53</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Family history (mental illness)</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                  <td>0.47</td>
                  <td>99.47</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Family history (neurological disease)</td>
                  <td>0.47</td>
                  <td>99.53</td>
                  <td>0.00</td>
                  <td>0.73</td>
                  <td>98.80</td>
                  <td>0.47</td>
                </tr>
                <tr valign="top">
                  <td>Family history (preeclampsia)</td>
                  <td>0.12</td>
                  <td>99.88</td>
                  <td>0.00</td>
                  <td>2.60</td>
                  <td>97.40</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Family history (rheumatic disease)</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                  <td>1.47</td>
                  <td>98.53</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Fetal paternal drinking history)</td>
                  <td>0.00</td>
                  <td>100.00</td>
                  <td>0.00</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Fetal paternal history of genetic diseases</td>
                  <td>1.00</td>
                  <td>99.00</td>
                  <td>0.00</td>
                  <td>0.87</td>
                  <td>99.07</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Fetal paternal smoking history</td>
                  <td>0.00</td>
                  <td>100.00</td>
                  <td>0.00</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Fever</td>
                  <td>9.67</td>
                  <td>90.33</td>
                  <td>0.00</td>
                  <td>1.93</td>
                  <td>98.07</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>G6PD<sup>c</sup></td>
                  <td>3.33</td>
                  <td>96.67</td>
                  <td>0.00</td>
                  <td>2.53</td>
                  <td>97.47</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Headache</td>
                  <td>2.80</td>
                  <td>97.20</td>
                  <td>0.00</td>
                  <td>0.80</td>
                  <td>99.13</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Insomnia</td>
                  <td>0.60</td>
                  <td>99.40</td>
                  <td>0.00</td>
                  <td>1.20</td>
                  <td>98.47</td>
                  <td>0.33</td>
                </tr>
                <tr valign="top">
                  <td>Mediterranean anemia screening</td>
                  <td>8.27</td>
                  <td>91.73</td>
                  <td>0.00</td>
                  <td>17.60</td>
                  <td>82.27</td>
                  <td>0.13</td>
                </tr>
                <tr valign="top">
                  <td>Palpitations</td>
                  <td>1.53</td>
                  <td>98.47</td>
                  <td>0.00</td>
                  <td>3.07</td>
                  <td>96.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (antiphospholipid syndrome)</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (chronic kidney disease)</td>
                  <td>0.80</td>
                  <td>99.20</td>
                  <td>0.00</td>
                  <td>1.07</td>
                  <td>98.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (diabetes mellitus)</td>
                  <td>0.60</td>
                  <td>99.40</td>
                  <td>0.00</td>
                  <td>0.13</td>
                  <td>99.87</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (drug allergy)</td>
                  <td>10.53</td>
                  <td>89.47</td>
                  <td>0.00</td>
                  <td>39.80</td>
                  <td>60.20</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (dysmenorrhea)</td>
                  <td>24.40</td>
                  <td>75.60</td>
                  <td>0.00</td>
                  <td>21.20</td>
                  <td>78.80</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (food allergy)</td>
                  <td>5.13</td>
                  <td>94.87</td>
                  <td>0.00</td>
                  <td>8.07</td>
                  <td>91.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (heart disease)</td>
                  <td>1.67</td>
                  <td>98.33</td>
                  <td>0.00</td>
                  <td>0.47</td>
                  <td>99.53</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (hematologic disease)</td>
                  <td>0.00</td>
                  <td>100.00</td>
                  <td>0.00</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (hypertension)</td>
                  <td>7.40</td>
                  <td>92.60</td>
                  <td>0.00</td>
                  <td>0.93</td>
                  <td>99.07</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (infectious disease)</td>
                  <td>1.93</td>
                  <td>98.07</td>
                  <td>0.00</td>
                  <td>3.80</td>
                  <td>96.20</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (preeclampsia)</td>
                  <td>0.93</td>
                  <td>99.07</td>
                  <td>0.00</td>
                  <td>0.87</td>
                  <td>99.13</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (surgery history)</td>
                  <td>35.67</td>
                  <td>64.33</td>
                  <td>0.00</td>
                  <td>36.27</td>
                  <td>63.67</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (systemic lupus erythematosus)</td>
                  <td>0.20</td>
                  <td>99.80</td>
                  <td>0.00</td>
                  <td>0.20</td>
                  <td>99.80</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (thalassemia)</td>
                  <td>1.00</td>
                  <td>99.00</td>
                  <td>0.00</td>
                  <td>0.80</td>
                  <td>99.13</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (trauma history)</td>
                  <td>8.80</td>
                  <td>91.20</td>
                  <td>0.00</td>
                  <td>2.87</td>
                  <td>97.13</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Personal history (viral hepatitis)</td>
                  <td>6.07</td>
                  <td>93.93</td>
                  <td>0.00</td>
                  <td>6.20</td>
                  <td>93.80</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Poor pregnancy history (induced abortion)</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                  <td>0.13</td>
                  <td>99.87</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Poor pregnancy history (miscarriage)</td>
                  <td>0.47</td>
                  <td>99.53</td>
                  <td>0.00</td>
                  <td>0.47</td>
                  <td>99.53</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Poor pregnancy history (premature birth)</td>
                  <td>0.27</td>
                  <td>99.73</td>
                  <td>0.00</td>
                  <td>0.27</td>
                  <td>99.73</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Prenatal screening</td>
                  <td>31.13</td>
                  <td>68.87</td>
                  <td>0.00</td>
                  <td>15.87</td>
                  <td>83.40</td>
                  <td>0.73</td>
                </tr>
                <tr valign="top">
                  <td>Regular prenatal check-ups</td>
                  <td>96.20</td>
                  <td>3.80</td>
                  <td>0.00</td>
                  <td>96.80</td>
                  <td>3.13</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Smoking</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                  <td>0.07</td>
                  <td>99.93</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Threatened abortion</td>
                  <td>6.20</td>
                  <td>93.80</td>
                  <td>0.00</td>
                  <td>5.80</td>
                  <td>94.00</td>
                  <td>0.20</td>
                </tr>
                <tr valign="top">
                  <td>Umbilical cord blood ratio</td>
                  <td>0.00</td>
                  <td>100.00</td>
                  <td>0.00</td>
                  <td>0.00</td>
                  <td>100.00</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Use of antihypertensive drugs</td>
                  <td>1.73</td>
                  <td>98.27</td>
                  <td>0.00</td>
                  <td>2.13</td>
                  <td>97.87</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Use of progestogen drugs</td>
                  <td>13.47</td>
                  <td>86.53</td>
                  <td>0.00</td>
                  <td>14.40</td>
                  <td>85.53</td>
                  <td>0.07</td>
                </tr>
                <tr valign="top">
                  <td>Vaginal bleeding</td>
                  <td>81.07</td>
                  <td>18.93</td>
                  <td>0.00</td>
                  <td>22.60</td>
                  <td>77.27</td>
                  <td>0.13</td>
                </tr>
                <tr valign="top">
                  <td>Vaginal discharge</td>
                  <td>33.00</td>
                  <td>67.00</td>
                  <td>0.00</td>
                  <td>48.47</td>
                  <td>51.53</td>
                  <td>0.00</td>
                </tr>
                <tr valign="top">
                  <td>Vaginal infection</td>
                  <td>25.27</td>
                  <td>74.73</td>
                  <td>0.00</td>
                  <td>16.20</td>
                  <td>82.73</td>
                  <td>1.07</td>
                </tr>
                <tr valign="top">
                  <td>Vaginal secretions</td>
                  <td>16.60</td>
                  <td>83.40</td>
                  <td>0.00</td>
                  <td>16.73</td>
                  <td>83.27</td>
                  <td>0.00</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>Not applicable.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>The mean pregnancy weight gain was 13.73 (SD 24.12) for QWEN and 13.75 (SD 31.28) for BAICHUAN.</p>
              </fn>
              <fn id="table1fn3">
                <p><sup>c</sup>G6PD: glucose-6-phosphate dehydrogenase.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <sec>
          <title>Accuracy and Precision</title>
          <p><xref rid="figure3" ref-type="fig">Figure 3</xref>A and 3B and Figure S1 (parts A and B) in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> illustrate the Q&amp;A space for a sample chunk extracted by QWEN and BAICHUAN with the comparison of manual annotation. The figures demonstrate the exceptional accuracy and precision of QWEN and BAICHUAN. QWEN attained an average accuracy of 95.52% and an average precision of 92.93%, whereas BAICHUAN displayed an average accuracy of 95.86% and an average precision of 90.08%. These figures clearly indicate that the 2 LLMs have more concentrated errors in specific concepts, and overall, they achieve high levels of precision in most extractions.</p>
          <p>LLMs demonstrated consistent performance across most questions and excelled in binary, well-defined medical history questions, often reaching 100% accuracy and precision. However, the accuracy performance varied significantly when dealing with questions that involved semantic ambiguities or definitional uncertainties. This inconsistency might be tied to the LLM’s training and inference alignment. Notable disparities were observed in questions pertaining to menstrual color (QWEN: 1000/1500, 66.7%; BAICHUAN: 1097/1500, 73.1%), early pregnancy symptoms (QWEN: 909/1500, 60.7%; BAICHUAN: 1474/1500, 98.3%), vaginal bleeding (QWEN: 593/1500, 39.5%; BAICHUAN: 1455/1500, 97%), bilateral lower limb edema (QWEN: 786/1500, 52.4%; BAICHUAN: 1486/1500, 99.7%), and menstrual flow (QWEN: 1498/1500, 99.8%; BAICHUAN: 605/1500, 40.3%).</p>
          <p>Apart from the above, the precision inconsistency performance of concepts could be attributed to their low true positive rate, like insomnia (QWEN: 3/17, 17.7%; BAICHUAN: 8/17, 47.1%), personal history—antiphospholipid syndrome (QWEN: 0/2, 0%; BAICHUAN: 1/2, 50%), and poor pregnancy history—induced abortion (QWEN: 1/2, 50%; BAICHUAN: 2/2, 100%). The exact precision is listed in Table S3 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Accuracy and precision in the question-and-answer space. With the local expert’s annotation of 1500 observations, parts A and B showcase a comparison of the accuracy and precision of QWEN, BAICHUAN, and QWEN(INT4) across various concepts. Our findings reveal that the performance trends of large language models are nearly uniform across different concepts in terms of accuracy while showing a discernible variation in precision. G6PD: glucose-6-phosphate dehydrogenase.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e54580_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Null Ratio</title>
          <p>As depicted in <xref ref-type="table" rid="table1">Table 1</xref>, both LLMs demonstrated superior performance with minimal null ratios. Specifically, QWEN (Figure S1A in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) exhibited a mean null ratio of 0.02%, in contrast to BAICHUAN (Figure S1B in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>), which recorded a slightly higher null ratio of 0.21%. Failure of QWEN extraction was only in pregnancy weight gain (411/25,709, 1.60%), but failures of BAICHUAN extraction were mainly in symptoms (abdominal pain: 1252/25,707, 4.87%; vaginal infection: 275/25,709, 1.07%).</p>
        </sec>
        <sec>
          <title>Time Consumption</title>
          <p>We conducted a comparative analysis of the time performance between QWEN and BAICHUAN on various Q&amp;A scales, discovering that BAICHUAN consistently exhibits higher time consumption across almost all scales, reaching up to 4 times that of QWEN, as illustrated in <xref rid="figure4" ref-type="fig">Figure 4</xref>B.</p>
          <p><xref rid="figure4" ref-type="fig">Figure 4</xref>A compares the time consumption of LLMs in extracting diverse concepts. Although there were significant differences across different concepts, overall, the LLMs demonstrated a consistent performance across these concepts. For queries with clear definitions and concise corpora, such as drug usage and previous pregnancy history, the time consumed was minimal. In the category of medical history, both models exhibited uniform and stable performances (QWEN and BAICHUAN both revealed a time consumption ratio of 1:3).</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Time consumption of question-and-answer (Q&amp;A) scales for QWEN and BAICHUAN, measured in seconds. (A) Comparative distribution of time consumption for QWEN and BAICHUAN per concept, and QWEN exhibited notably lower extraction times across various Q&amp;A scales compared to BAICHUAN. (B) Comparative distribution of the time consumption for 2 large language models per observation. G6PD: glucose-6-phosphate dehydrogenase.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e54580_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Additional Research</title>
        <p>In clinical practice, to address scenarios of resource constraints, we used a quantified version of the LLM in our study to validate the applicability of this approach. We used an official-release INT4 version of QWEN, QWEN(INT4). The model was deployed on an NVIDIA RTX 3090 GPU (24 GB).</p>
        <p>With the same approach listed above, the performance of QWEN(INT4) achieved even better performance. <xref rid="figure3" ref-type="fig">Figure 3</xref> and Figure S1C in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> demonstrate that the average accuracy of QWEN(INT4) is 97.28%, accompanied by a null ratio of 0%.</p>
        <p>Despite a notable correlation in performance extraction between QWEN(INT4) and QWEN, QWEN(INT4) demonstrated superior efficiency on limited hardware, with an average of 31 seconds per observation, compared to 47 seconds for QWEN and 312 seconds for BAICHUAN.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, the extracted scales incorporated not only the conventional features of interest but also less frequently mentioned dimensions in previous cohorts or guidelines. These included food and drug allergies (6.6% for food allergy and 25.2% for drug allergy), certain pregnancy symptoms (average positive ratio of 0.9% for insomnia and 2.3% for palpitations), menstrual conditions (22.8% for dysmenorrhea), medical history (1% for asthma family history and 0.27% for mental illness family history), and gestational intervention (13.93% for progestogen and 1.4% for aspirin).</p>
        <p>As a naturally recruited cohort of pregnancy, the extracted features show comparable proportions or trends compared with similar studies, such as systemic lupus erythematosus (average positive ratio of 0.20% vs 0.03%-0.23% of similar cohorts [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]) and antiphospholipid syndrome (average positive ratio of 0.08% vs 0.02%-0.12% of similar cohorts [<xref ref-type="bibr" rid="ref43">43</xref>]), thereby corroborating the accuracy of our approach.</p>
        <p>Additionally, certain scale deviations were revealed compared to similar studies, notably in fetal paternal smoking history (average positive ratio of 0.04% vs approximately 28.1%-40% in similar studies [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]). Although these deviations were few, we conducted a sample retracing to the original texts and determined that the extraction approach was not at fault and accurately reflected the original data. This discrepancy highlights persistent concerns [<xref ref-type="bibr" rid="ref46">46</xref>] regarding the data quality in inpatient documentation, originating from patient self-reports and physician documentation, and vulnerability to recall and inquiry bias. Documentation varies among patients, influenced not only by patient conditions but also by physicians’ writing habits. Thus, we regard our approach as a preexperimental data analysis. Despite the presence of biases or missing dimensions, the approach uncovers several dimensions absent in structured medical texts, and valuable insights could still be extracted from the data with appropriate statistics [<xref ref-type="bibr" rid="ref47">47</xref>]. In clinical practice, preliminary interviews with documenting physicians are recommended prior to the selection of concepts to enhance data quality and mitigate potential biases.</p>
        <p>In the context of the extraction process, even when deployed solely on a standard consumer-grade GPU (NVIDIA RTX 3090), the QWEN(INT4) completed the extraction of 25,709 observations and 68 features within 15 calendar days, averaging 48.9 seconds per observation. In practical applications, deploying 2 instances of QWEN(INT4) on a single graphics card, coupled with an additional deployment in CPU [<xref ref-type="bibr" rid="ref36">36</xref>], is hypothesized to reduce the extraction to approximately 7 days. Furthermore, multi-GPU server clusters, prevalent in clinical environments, could markedly reduce processing times, potentially to the scale of hours.</p>
        <p>In our study, we experimented with omitting the corpus extraction step, directly using the long text of each observation’s chief complaints and medical histories as raw data for Q&amp;A scale extraction. However, the experiment yielded poor performance in accuracy, precision, and time consumption, as illustrated in <xref rid="figure5" ref-type="fig">Figure 5</xref> and Figure S2 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        <p>These limitations appear significantly correlated with the current technological constraints of LLMs [<xref ref-type="bibr" rid="ref32">32</xref>], which tend to generate “feature hallucinations” more frequently when processing extensive texts [<xref ref-type="bibr" rid="ref48">48</xref>], leading to the loss of critical information. We believe that this issue will be resolved as the technology continues to evolve [<xref ref-type="bibr" rid="ref49">49</xref>].</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Approach performance when omitting corpus extraction step. (A) The average time consumed per question-and-answer (Q&amp;A) interaction over 300 observations for both models. (B) Comparing the distribution of time consumption for QWEN and BAICHUAN in a single observation per Q&amp;A scale. G6PD: glucose-6-phosphate dehydrogenase.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e54580_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>In our experimental validation, we selected a limited set of concepts, comprising only 68 items, to balance the consideration of time constraints. Despite our efforts to encompass a broad scope, some dimensions inevitably remain unaddressed, which is a limitation in verifying efficiency and accuracy across all dimensions.</p>
        <p>Furthermore, the raw data in this study was sourced exclusively from a single hospital, spanning nearly a decade. This duration, while significant, introduces limitations in the generalizability of our approach.</p>
        <p>Additionally, the approach used only 2 LLMs. Although we anticipate that future LLMs will be compatible with the current approach, this assumption necessitates further experimental validation.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Our proposed approach offers a potential methodology for clinical text data analysis. It involves extracting and summarizing concepts from the comprehensive text of a defined population, thus selecting research directions of interest, and eventually generating analyzable features for the cohort. This approach demonstrates notable precision and could provide substantial data support for future research endeavors.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>A sample of chief complaints and medical histories.</p>
        <media xlink:href="jmir_v26i1e54580_app1.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Additional statistics.</p>
        <media xlink:href="jmir_v26i1e54580_app2.docx" xlink:title="DOCX File , 873 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CoT</term>
          <def>
            <p>Chain of Thought</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">GPU</term>
          <def>
            <p>graphics processing unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">Q&amp;A</term>
          <def>
            <p>question and answer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">SNOMED CT</term>
          <def>
            <p>Systematised Nomenclature of Medicine Clinical Term</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We are grateful to Yinyao Ma and the clinical team for their exceptional contributions to this project and we thank the technical support provided by China National GeneBank.</p>
      <p>This work was supported by Guangxi Key Research and Development Program (AB22035056).</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated and analyzed during this study are not publicly available due to privacy or ethical restrictions but are available on request from the corresponding author.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>LW and YM contributed equally to this study. LW, HL, and YM participated in the study design and drafted the manuscript. YL and HL participated in data collection and outcome rule review. LW and WB performed the statistical analysis, and established machine learning models. YL helped to draft the manuscript. All authors read and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tayefi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ngo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chomutare</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dalianis</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salvi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Budrionis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Godtliebsen</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Challenges and opportunities beyond structured data in analysis of electronic health records</article-title>
          <source>WIREs Computational Stats</source>
          <year>2021</year>
          <month>02</month>
          <day>14</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>1</fpage>
          <lpage>19</lpage>
          <pub-id pub-id-type="doi">10.1002/wics.1549</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Data processing and text mining technologies on electronic medical records: a review</article-title>
          <source>J Healthc Eng</source>
          <year>2018</year>
          <volume>2018</volume>
          <fpage>4302425</fpage>
          <pub-id pub-id-type="doi">10.1155/2018/4302425</pub-id>
          <pub-id pub-id-type="medline">29849998</pub-id>
          <pub-id pub-id-type="pmcid">PMC5911323</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Varshini</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Uthra</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>An approach to extract meaningful dData from unstructured clinical notes</article-title>
          <source>Inventive Systems and Control</source>
          <year>2021</year>
          <month>06</month>
          <day>08</day>
          <publisher-loc>Singapore</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>581</fpage>
          <lpage>590</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Clinical text data in machine learning: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>03</month>
          <day>31</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>e17984</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/3/e17984/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17984</pub-id>
          <pub-id pub-id-type="medline">32229465</pub-id>
          <pub-id pub-id-type="pii">v8i3e17984</pub-id>
          <pub-id pub-id-type="pmcid">PMC7157505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Koh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>ALP</given-names>
            </name>
          </person-group>
          <article-title>Keyword extraction and structuralization of medical reports</article-title>
          <source>Health Inf Sci Syst</source>
          <year>2020</year>
          <month>12</month>
          <day>03</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32269770"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13755-020-00108-6</pub-id>
          <pub-id pub-id-type="medline">32269770</pub-id>
          <pub-id pub-id-type="pii">108</pub-id>
          <pub-id pub-id-type="pmcid">PMC7125292</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vassar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Holzmann</surname>
              <given-names>Matthew</given-names>
            </name>
          </person-group>
          <article-title>The retrospective chart review: important methodological considerations</article-title>
          <source>J Educ Eval Health Prof</source>
          <year>2013</year>
          <month>11</month>
          <day>30</day>
          <volume>10</volume>
          <fpage>12</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/24324853"/>
          </comment>
          <pub-id pub-id-type="doi">10.3352/jeehp.2013.10.12</pub-id>
          <pub-id pub-id-type="medline">24324853</pub-id>
          <pub-id pub-id-type="pii">jeehp-10-12</pub-id>
          <pub-id pub-id-type="pmcid">PMC3853868</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cassidy</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Marsh</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Holleran</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Ruhl</surname>
              <given-names>LS</given-names>
            </name>
          </person-group>
          <article-title>Methodology to improve data quality from chart review in the managed care setting</article-title>
          <source>Am J Manag Care</source>
          <year>2002</year>
          <month>09</month>
          <volume>8</volume>
          <issue>9</issue>
          <fpage>787</fpage>
          <lpage>93</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ajmc.com/pubMed.php?pii=205"/>
          </comment>
          <pub-id pub-id-type="medline">12234019</pub-id>
          <pub-id pub-id-type="pii">205</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Engel</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Henderson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fergenbaum</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Colantonio</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Medical record review conduction model for improving interrater reliability of abstracting medical-related information</article-title>
          <source>Eval Health Prof</source>
          <year>2009</year>
          <month>09</month>
          <day>13</day>
          <volume>32</volume>
          <issue>3</issue>
          <fpage>281</fpage>
          <lpage>98</lpage>
          <pub-id pub-id-type="doi">10.1177/0163278709338561</pub-id>
          <pub-id pub-id-type="medline">19679636</pub-id>
          <pub-id pub-id-type="pii">0163278709338561</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agbavor</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Predicting dementia from spontaneous speech using large language models</article-title>
          <source>PLOS Digit Health</source>
          <year>2022</year>
          <month>12</month>
          <day>22</day>
          <volume>1</volume>
          <issue>12</issue>
          <fpage>e0000168</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812634"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000168</pub-id>
          <pub-id pub-id-type="medline">36812634</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-22-00226</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mykowiecka</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marciniak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kupść</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Rule-based information extraction from patients' clinical data</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>10</month>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>923</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(09)00100-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2009.07.007</pub-id>
          <pub-id pub-id-type="medline">19646551</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(09)00100-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raffel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Narang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matena</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Exploring the limits of transfer learning with a unified text-to-text transformer</article-title>
          <source>JMLR</source>
          <year>2020</year>
          <volume>21</volume>
          <issue>140</issue>
          <fpage>1</fpage>
          <lpage>67</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Oct, 2018. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2018arXiv181004805D"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>1240</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Rawat</surname>
              <given-names>BPS</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Fine-tuning bidirectional encoder representations from transformers (BERT)-based models on large-scale electronic health record notes: an empirical study</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>09</month>
          <day>12</day>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>e14830</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/3/e14830/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14830</pub-id>
          <pub-id pub-id-type="medline">31516126</pub-id>
          <pub-id pub-id-type="pii">v7i3e14830</pub-id>
          <pub-id pub-id-type="pmcid">PMC6746103</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yilahun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hamdulla</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Biomedical named entity recognition based on MCBERT</article-title>
          <year>2022</year>
          <conf-name>2022 International Conference on Asian Language Processing (IALP)</conf-name>
          <conf-date>October 27-28, 2022</conf-date>
          <conf-loc>Singapore</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ialp57159.2022.9961297</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Use of BERT (bidirectional encoder representations from transformers)-based deep learning method for extracting evidences in chinese radiology reports: development of a computer-aided liver cancer diagnosis framework</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>01</month>
          <day>12</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e19689</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e19689/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19689</pub-id>
          <pub-id pub-id-type="medline">33433395</pub-id>
          <pub-id pub-id-type="pii">v23i1e19689</pub-id>
          <pub-id pub-id-type="pmcid">PMC7837998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Multi-level representation learning for Chinese medical entity recognition: model development and validation</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>05</month>
          <day>04</day>
          <volume>8</volume>
          <issue>5</issue>
          <fpage>e17637</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/5/e17637/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17637</pub-id>
          <pub-id pub-id-type="medline">32364514</pub-id>
          <pub-id pub-id-type="pii">v8i5e17637</pub-id>
          <pub-id pub-id-type="pmcid">PMC7235813</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical information extraction applications: a literature review</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>01</month>
          <volume>77</volume>
          <fpage>34</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30256-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.11.011</pub-id>
          <pub-id pub-id-type="medline">29162496</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30256-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5771858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Polnaszek</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gilmore-Bykovskyi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hovanes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Roiland</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ferguson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kind</surname>
              <given-names>AJH</given-names>
            </name>
          </person-group>
          <article-title>Overcoming the challenges of unstructured data in multisite, electronic medical record-based abstraction</article-title>
          <source>Med Care</source>
          <year>2016</year>
          <month>10</month>
          <volume>54</volume>
          <issue>10</issue>
          <fpage>e65</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27624585"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MLR.0000000000000108</pub-id>
          <pub-id pub-id-type="medline">27624585</pub-id>
          <pub-id pub-id-type="pii">00005650-201610000-00012</pub-id>
          <pub-id pub-id-type="pmcid">PMC5024721</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tarik</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Sorin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Symons</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yaliraki</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Toni</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Extracting information from free text through unsupervised graph-based clustering: an application to patient incident records</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Aug 31, 2018. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2019arXiv190900183T"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1909.00183</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Developing prompts from large language model for extracting clinical information from pathology and ultrasound reports in breast cancer</article-title>
          <source>Radiat Oncol J</source>
          <year>2023</year>
          <month>09</month>
          <volume>41</volume>
          <issue>3</issue>
          <fpage>209</fpage>
          <lpage>216</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37793630"/>
          </comment>
          <pub-id pub-id-type="doi">10.3857/roj.2023.00633</pub-id>
          <pub-id pub-id-type="medline">37793630</pub-id>
          <pub-id pub-id-type="pii">roj.2023.00633</pub-id>
          <pub-id pub-id-type="pmcid">PMC10556835</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Decker</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Trang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ramirez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Colley</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bongiovanni</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Wick</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Large language model-based chatbot vs surgeon-generated informed consent documentation for common procedures</article-title>
          <source>JAMA Netw Open</source>
          <year>2023</year>
          <month>10</month>
          <day>02</day>
          <volume>6</volume>
          <issue>10</issue>
          <fpage>e2336997</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37812419"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.36997</pub-id>
          <pub-id pub-id-type="medline">37812419</pub-id>
          <pub-id pub-id-type="pii">2810364</pub-id>
          <pub-id pub-id-type="pmcid">PMC10562939</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Warner</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Aneja</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bylund</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Using ChatGPT to evaluate cancer myths and misconceptions: artificial intelligence and cancer information</article-title>
          <source>JNCI Cancer Spectr</source>
          <year>2023</year>
          <month>03</month>
          <day>01</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>a</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36929393"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jncics/pkad015</pub-id>
          <pub-id pub-id-type="medline">36929393</pub-id>
          <pub-id pub-id-type="pii">7078555</pub-id>
          <pub-id pub-id-type="pmcid">PMC10020140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cascella</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montomoli</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bellini</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Bignami</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the feasibility of ChatGPT in healthcare: an analysis of multiple clinical and research scenarios</article-title>
          <source>J Med Syst</source>
          <year>2023</year>
          <month>03</month>
          <day>04</day>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>33</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36869927"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10916-023-01925-4</pub-id>
          <pub-id pub-id-type="medline">36869927</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10916-023-01925-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC9985086</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Van Bulck</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Using ChatGPT and Google Bard to improve the readability of written patient information: a proof of concept</article-title>
          <source>Eur J Cardiovasc Nurs</source>
          <year>2024</year>
          <month>03</month>
          <day>12</day>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>122</fpage>
          <lpage>126</lpage>
          <pub-id pub-id-type="doi">10.1093/eurjcn/zvad087</pub-id>
          <pub-id pub-id-type="medline">37603843</pub-id>
          <pub-id pub-id-type="pii">7246857</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Giannakopoulos</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kavadella</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aaqel Salim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stamatopoulos</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kaklamanos</surname>
              <given-names>EG</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of the performance of generative AI large language models ChatGPT, Google Bard, and Microsoft Bing Chat in supporting evidence-based dentistry: comparative mixed methods study</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>12</month>
          <day>28</day>
          <volume>25</volume>
          <fpage>e51580</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e51580/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51580</pub-id>
          <pub-id pub-id-type="medline">38009003</pub-id>
          <pub-id pub-id-type="pii">v25i1e51580</pub-id>
          <pub-id pub-id-type="pmcid">PMC10784979</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thapa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Adhikari</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT, Bard, and large language models for biomedical research: opportunities and pitfalls</article-title>
          <source>Ann Biomed Eng</source>
          <year>2023</year>
          <month>12</month>
          <day>16</day>
          <volume>51</volume>
          <issue>12</issue>
          <fpage>2647</fpage>
          <lpage>2651</lpage>
          <pub-id pub-id-type="doi">10.1007/s10439-023-03284-0</pub-id>
          <pub-id pub-id-type="medline">37328703</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10439-023-03284-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sezgin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chekeni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Keim</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Clinical accuracy of large language models and Google search responses to postpartum depression questions: cross-sectional study</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>09</month>
          <day>11</day>
          <volume>25</volume>
          <fpage>e49240</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e49240/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/49240</pub-id>
          <pub-id pub-id-type="medline">37695668</pub-id>
          <pub-id pub-id-type="pii">v25i1e49240</pub-id>
          <pub-id pub-id-type="pmcid">PMC10520763</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Dissecting the runtime performance of the training, fine-tuning, and inference of large language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Nov 2023. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv231103687Z"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A survey on large language model based autonomous agents</article-title>
          <source>arXiv</source>
          <volume>01</volume>
          <comment>Preprint posted online on Aug 22, 2023. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv230811432W"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2308.11432</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Schuurmans</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bosma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Chain-of-thought prompting elicits reasoning in large language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Jan 28, 2022</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2201.11903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yun</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ching</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Chain of natural language inference for reducing large language model ungrounded hallucinations</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Oct 6, 2023. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv231003951L"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2310.03951</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schuurmans</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Narang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Self-consistency improves chain of thought reasoning in language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Mar 21, 2022. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2022arXiv220311171W"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <article-title>ACOG committee opinion no. 743: low-dose aspirin use during pregnancy</article-title>
          <source>Obstet Gynecol</source>
          <year>2018</year>
          <month>07</month>
          <volume>132</volume>
          <issue>1</issue>
          <fpage>e44</fpage>
          <lpage>e52</lpage>
          <pub-id pub-id-type="doi">10.1097/AOG.0000000000002708</pub-id>
          <pub-id pub-id-type="medline">29939940</pub-id>
          <pub-id pub-id-type="pii">00006250-201807000-00057</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>National</surname>
              <given-names>IFHEG</given-names>
            </name>
          </person-group>
          <article-title>National Institute for Health and Care Excellence (NICE)</article-title>
          <source>The Grants Register 2019</source>
          <year>2019</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>Palgrave Macmillan</publisher-name>
          <fpage>540</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Men</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Qwen Technical Report</article-title>
          <source>arXiv</source>
          <volume>01</volume>
          <comment>Preprint posted online on Sep 2023. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv230916609B"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2309.16609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Baichuan 2: open large-scale language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on Sep 19, 2023. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv230910305Y"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <source>OpenAI</source>
          <access-date>2024-03-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/blog/openai-api">https://openai.com/blog/openai-api</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chiang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Judging LLM-as-a-judge with MT-bench and chatbot arena</article-title>
          <source>arXiv</source>
          <volume>01</volume>
          <comment>Preprint posted online on Jun 9, 2023. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv230605685Z"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2306.05685</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <source>SNOMED International</source>
          <access-date>2024-03-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.ihtsdo.org/snomed-ct/">http://www.ihtsdo.org/snomed-ct/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clowse</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Myers</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>James</surname>
              <given-names>AH</given-names>
            </name>
          </person-group>
          <article-title>A national study of the complications of lupus in pregnancy</article-title>
          <source>Am J Obstet Gynecol</source>
          <year>2008</year>
          <month>08</month>
          <volume>199</volume>
          <issue>2</issue>
          <fpage>127.e1</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18456233"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ajog.2008.03.012</pub-id>
          <pub-id pub-id-type="medline">18456233</pub-id>
          <pub-id pub-id-type="pii">S0002-9378(08)00276-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC2542836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rees</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Doherty</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grainge</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lanyon</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>The worldwide incidence and prevalence of systemic lupus erythematosus: a systematic review of epidemiological studies</article-title>
          <source>Rheumatology (Oxford)</source>
          <year>2017</year>
          <month>11</month>
          <day>01</day>
          <volume>56</volume>
          <issue>11</issue>
          <fpage>1945</fpage>
          <lpage>1961</lpage>
          <pub-id pub-id-type="doi">10.1093/rheumatology/kex260</pub-id>
          <pub-id pub-id-type="medline">28968809</pub-id>
          <pub-id pub-id-type="pii">4079913</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hwang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of antiphospholipid syndrome in Korea: a nationwide population-based study</article-title>
          <source>J Korean Med Sci</source>
          <year>2020</year>
          <month>02</month>
          <day>10</day>
          <volume>35</volume>
          <issue>5</issue>
          <fpage>e35</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jkms.org/DOIx.php?id=10.3346/jkms.2020.35.e35"/>
          </comment>
          <pub-id pub-id-type="doi">10.3346/jkms.2020.35.e35</pub-id>
          <pub-id pub-id-type="medline">32030922</pub-id>
          <pub-id pub-id-type="pii">35.e35</pub-id>
          <pub-id pub-id-type="pmcid">PMC7008065</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Smoking in pregnancy: a cross-sectional study in China</article-title>
          <source>Tob Induc Dis</source>
          <year>2017</year>
          <month>7</month>
          <day>24</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>35</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tobaccoinduceddiseases.biomedcentral.com/articles/10.1186/s12971-017-0140-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12971-017-0140-0</pub-id>
          <pub-id pub-id-type="medline">28747859</pub-id>
          <pub-id pub-id-type="pii">140</pub-id>
          <pub-id pub-id-type="pmcid">PMC5525238</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Association of husband smoking with wife's hypertension status in over 5 million Chinese females aged 20 to 49 years</article-title>
          <source>J Am Heart Assoc</source>
          <year>2017</year>
          <month>03</month>
          <day>20</day>
          <volume>6</volume>
          <issue>3</issue>
          <fpage>e004924</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ahajournals.org/doi/10.1161/JAHA.116.004924?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/JAHA.116.004924</pub-id>
          <pub-id pub-id-type="medline">28320748</pub-id>
          <pub-id pub-id-type="pii">JAHA.116.004924</pub-id>
          <pub-id pub-id-type="pmcid">PMC5524022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leon</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Balakrishna</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hohlfeld</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Odendaal</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zweigenthal</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Anstey Watkins</surname>
              <given-names>Jocelyn</given-names>
            </name>
            <name name-style="western">
              <surname>Daniels</surname>
              <given-names>Karen</given-names>
            </name>
          </person-group>
          <article-title>Routine Health Information System (RHIS) improvements for strengthened health system management</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2020</year>
          <month>08</month>
          <day>13</day>
          <volume>8</volume>
          <issue>8</issue>
          <fpage>CD012012</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32803893"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD012012.pub2</pub-id>
          <pub-id pub-id-type="medline">32803893</pub-id>
          <pub-id pub-id-type="pmcid">PMC8094584</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miñarro-Giménez</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Cornet</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jaulent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dewenter</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Thun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gøeg</surname>
              <given-names>Kirstine Rosenbeck</given-names>
            </name>
            <name name-style="western">
              <surname>Karlsson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Quantitative analysis of manual annotation of clinical text samples</article-title>
          <source>Int J Med Inform</source>
          <year>2019</year>
          <month>03</month>
          <volume>123</volume>
          <fpage>37</fpage>
          <lpage>48</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(18)30544-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2018.12.011</pub-id>
          <pub-id pub-id-type="medline">30654902</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(18)30544-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rawte</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chakraborty</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Towhidul</surname>
              <given-names>ITS</given-names>
            </name>
            <name name-style="western">
              <surname>Chadha</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The troubling emergence of hallucination in large language models -- an extensive definition, quantification, and prescriptive remediations</article-title>
          <source>arXiv</source>
          <volume>01</volume>
          <comment>Preprint posted online on Oct 8, 2023. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv231004988R"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2310.04988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Palangi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Simões</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chandrasekaran</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Mukherjee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mitra</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Teaching language models to hallucinate less with synthetic tasks</article-title>
          <source>arXiv</source>
          <volume>01</volume>
          <comment>Preprint posted online on Oct 10, 2023. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv231006827J"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2310.06827</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
