<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e56614</article-id>
      <article-id pub-id-type="pmid">38819879</article-id>
      <article-id pub-id-type="doi">10.2196/56614</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Redefining Health Care Data Interoperability: Empirical Exploration of Large Language Models in Information Exchange</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Tsafnat</surname>
            <given-names>Guy</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lee</surname>
            <given-names>Jisan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lee</surname>
            <given-names>Suehyun</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Yoon</surname>
            <given-names>Dukyong</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Systems Informatics</institution>
            <institution>Yonsei University College of Medicine</institution>
            <addr-line>50-1 Yonsei-ro Seodaemun-gu</addr-line>
            <addr-line>Seoul, 03722</addr-line>
            <country>Republic of Korea</country>
            <phone>82 31 5189 8450</phone>
            <email>dukyong.yoon@yonsei.ac.kr</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1635-8376</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Han</surname>
            <given-names>Changho</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4121-5465</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Dong Won</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9716-2767</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Songsoo</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6908-4324</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Bae</surname>
            <given-names>SungA</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1484-4645</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Ryu</surname>
            <given-names>Jee An</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-3868-9497</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Choi</surname>
            <given-names>Yujin</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0005-8204-7463</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Systems Informatics</institution>
        <institution>Yonsei University College of Medicine</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Institute for Innovation in Digital Healthcare (IIDH)</institution>
        <institution>Severance Hospital</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Center for Digital Health</institution>
        <institution>Yongin Severance Hospital</institution>
        <institution>Yonsei University Health System</institution>
        <addr-line>Yongin</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Cardiology</institution>
        <institution>Yongin Severance Hospital</institution>
        <institution>Yonsei University College of Medicine</institution>
        <addr-line>Yongin</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Dukyong Yoon <email>dukyong.yoon@yonsei.ac.kr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>5</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e56614</elocation-id>
      <history>
        <date date-type="received">
          <day>22</day>
          <month>1</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>22</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>22</day>
          <month>4</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>27</day>
          <month>4</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Dukyong Yoon, Changho Han, Dong Won Kim, Songsoo Kim, SungA Bae, Jee An Ryu, Yujin Choi. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 31.05.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e56614" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Efficient data exchange and health care interoperability are impeded by medical records often being in nonstandardized or unstructured natural language format. Advanced language models, such as large language models (LLMs), may help overcome current challenges in information exchange.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to evaluate the capability of LLMs in transforming and transferring health care data to support interoperability.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Using data from the Medical Information Mart for Intensive Care III and UK Biobank, the study conducted 3 experiments. Experiment 1 assessed the accuracy of transforming structured laboratory results into unstructured format. Experiment 2 explored the conversion of diagnostic codes between the coding frameworks of the <italic>ICD-9-CM</italic> (<italic>International Classification of Diseases, Ninth Revision, Clinical Modification</italic>), and Systematized Nomenclature of Medicine Clinical Terms (SNOMED-CT) using a traditional mapping table and a text-based approach facilitated by the LLM ChatGPT. Experiment 3 focused on extracting targeted information from unstructured records that included comprehensive clinical information (discharge notes).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The text-based approach showed a high conversion accuracy in transforming laboratory results (experiment 1) and an enhanced consistency in diagnostic code conversion, particularly for frequently used diagnostic names, compared with the traditional mapping approach (experiment 2). In experiment 3, the LLM showed a positive predictive value of 87.2% in extracting generic drug names.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study highlighted the potential role of LLMs in significantly improving health care data interoperability, demonstrated by their high accuracy and efficiency in data transformation and exchange. The LLMs hold vast potential for enhancing medical data exchange without complex standardization for medical terms and data structure.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>health care interoperability</kwd>
        <kwd>large language models</kwd>
        <kwd>medical data transformation</kwd>
        <kwd>data standardization</kwd>
        <kwd>text-based</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Efficient health care data exchange is essential in medicine, particularly in facilitating continuous care [<xref ref-type="bibr" rid="ref1">1</xref>]. Such data exchange becomes crucial when a patient uses multiple health care facilities or receives concurrent care, significantly influencing accurate treatment strategies. The emergence of personalized health care, becoming a cornerstone of modern medicine, necessitates the use of personal health records. This shift complicates data exchange processes as it demands the integration of data from multiple health care institutions, thereby posing substantial challenges [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Additionally, health care is increasingly including patient-generated health data (PGHD) from a diverse range of devices, including wearable technology, given the heterogeneity of products from different vendors [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>Globally, health care systems contend with varying medical record formats and disparate coding systems. In the globalized health care paradigm, the mobility of patients across international boundaries introduces an added layer of complexity. The necessity for efficiently leveraging consolidated information from multiple nations escalates as international collaborative research broadens [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. The <italic>International Classification of Diseases</italic> (<italic>ICD</italic>) has served as a global standard for diagnostic nomenclature, whereas the Systematized Nomenclature of Medicine Clinical Terms (SNOMED-CT) presents a detailed, structured, and multiaxial medical terminology system, gaining adoption worldwide, including in the United States and Europe. Divergent drug coding systems also continue to exist between the United States and Europe, with the RxNorm system adopted in the United States and the ATC system used across Europe. These discrepancies underscore the urgent need for robust and effective health care data exchange pipelines.</p>
      <p>Over the years, significant attempts have been made toward the standardization of health care data amid notable challenges and limitations. Protocols, such as Health Level Seven International and Fast Healthcare Interoperability Resources (FHIR), have been introduced to enhance data exchange between medical devices and electronic health records [<xref ref-type="bibr" rid="ref9">9</xref>]. However, despite their use, these standards often meet with noncompliance or suboptimal implementation. Specifically, FHIR has received criticism for its inherent complexity, obstructing its widespread adoption [<xref ref-type="bibr" rid="ref10">10</xref>]. Moreover, a key obstacle in the exchange of health care data lies in the initial state of medical records, many of which are not stored following a universal standard. This inconsistency creates a significant challenge even before leveraging exchange protocols like Health Level Seven International and FHIR, designed to facilitate data sharing. The presence of standards does not automatically solve the issue of initiating the exchange when the starting point involves aligning diverse data formats.</p>
      <p>The Observational Health Data Sciences and Informatics initiative represents one of the most robust efforts toward data standardization for research purposes. This initiative has developed a common data model and promoted data standardization across various institutions in accordance with this format, significantly accelerating data analysis across institutions [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Nonetheless, the standardization process has its limitations. One is a notable risk of information loss from the original data during standardization [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Despite sustained global efforts to transition data into standardized formats, the inherent challenges of standardization inhibit complete conversion and representation of the finer details in the original data. Therefore, effective data standardization remains a pervasive challenge in health care data exchange.</p>
      <p>To address the challenges associated with data standardization, we attempted to explore alternatives beyond traditional approaches. A potential solution might be a system that supports flexible communication of raw data, for example, in natural language, permitting the end user to process and interpret data as required, thereby reducing the necessity for strict standardization. Large language models (LLMs), such as ChatGPT, which are designed to produce contextually relevant and coherent natural language responses based on input data, might be promising tools in this regard. Leveraging the capabilities of LLMs can enhance natural human interaction and streamline the management and summarization of extensive language-based data sets. Multiple studies have reported these potential applications of LLMs in the medical field; for example, mining medical text data for relevant clinical information, summarizing patient records and research findings, inferring medical outcomes from complex case histories, and reviewing medical literature to identify trends and validate clinical practices [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Consequently, if LLMs can proficiently transcribe patient data into text format and the receiving end can efficiently structure the resultant text data, then the intricate stages of data standardization may become redundant. This paradigm shift could significantly alter health care data exchange, heralding a future of seamless and universal data interoperability.</p>
      <p>This study tests the hypothesis that text-based conversion and integration of hospital data in different databases would be more effective than current methods. To prove this, we focused on 3 aspects: accuracy of numerical data transformation into text and back, fidelity of text-based transformation for semantic data using <italic>ICD</italic> codes (ie, <italic>ICD-9-CM</italic> [<italic>International Classification of Diseases, Ninth Revision, Clinical Modification</italic>]), and effectiveness of extracting specific information, such as intensive care unit (ICU) medication details, during the transfer of text-format data. This study aims to demonstrate the potential of natural language–based systems for future health care data exchange.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was approved by the institutional review board of Yongin Severance Hospital (9-2023-0037), and conducted in accordance with the Declaration of Helsinki, and the requirement for written informed consent was waived due to its retrospective nature.</p>
      </sec>
      <sec>
        <title>Data Sources</title>
        <p>This study used 2 comprehensive public health care data sets, namely, the UK Biobank and the Medical Information Mart for Intensive Care III (MIMIC-III). The UK Biobank serves as a notable national and international health resource, monitoring the lives of 500,000 voluntary participants aged between 40 and 69 years across the United Kingdom from 2006 to 2010. This resource aims to bolster the prevention, diagnosis, and treatment of a wide range of serious and life-threatening diseases. The data set includes genotypic and phenotypic data, covering medical, lifestyle, and environmental aspects. The UK Biobank contains structured data from diverse diagnostic tests, medical and family histories, and various physical measures. The MIMIC-III database, crafted by the Lab for Computational Physiology at MIT, is a broad, publicly available resource containing the deidentified health data of approximately 40,000 critical care patients [<xref ref-type="bibr" rid="ref18">18</xref>]. This data set includes demographic information, vital signs, laboratory tests, and medications, among other features. It is valued for its over 2 million free-text clinical notes, presenting a rich source of natural language medical data.</p>
        <p>This study used ChatGPT (version 3.5; OpenAI), an artificial intelligence model recognized for its exceptional performance among universally applicable models [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. Given that our primary aim was to assess the ability of LLMs to facilitate health care data exchange in general scenarios, we opted against fine-tuning the model to prevent overspecialization to specific data sets. As a result, we used ChatGPT (version 3.5) in its original form, without any modifications. Furthermore, our focus was on testing the accuracy of information extraction and transformation rather than the creativity of the language model. Therefore, in all experiments, we set the temperature to 0 to ensure a deterministic output from the model.</p>
        <p>The objectives of our study required the conduct of multiple trials featuring a range of prompts, a process termed prompt engineering. This process carries the potential risk of introducing an overfitting bias, which could boost the performance on specific data sets. Hence, we differentiated between the data used for prompt engineering experiments and those used to assess the performance of our experiments (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Given the absence of a standardized methodology for prompt engineering, researchers often carry out this process manually, relying on trial-and-error approaches based on experience.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Comparison of traditional standardization-based and proposed text–based flexible data exchange processes. This figure illustrates the difference between the conventional process of data exchange, which relies on standardization, and our suggested method of flexible data exchange leveraging unstructured, text-based data. The traditional approach necessitates standardization, potentially leading to the loss or distortion of original information, diminished adaptability in new settings, and an increase in the cost and effort required for data exchange. Our proposed text-based, flexible data exchange process avoids these issues by reducing the loss of original information and boosting adaptability. This method is expected to cut down both the cost and effort involved in data exchange. At the bottom, we have delineated the 3 stages that our experiments aimed to validate. ETL: Extract, Transform, Load; FHIR: Fast Healthcare Interoperability Resources; HL7: Health Level Seven International; LLM: large language model.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e56614_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Overview of the Experimental Design</title>
        <p>We hypothesized that converting a hospital’s data into text format and then integrating such data in another hospital’s database can be more accurate and comprehensive compared with other data transformation methods. To prove this, we tested 3 key aspects (<xref rid="figure2" ref-type="fig">Figure 2</xref>). First, we investigated whether the original data could be accurately conveyed when transformed into text (experiment 1). This involved converting numerical data into text and back into numerical form to check for any deviations from the original data. Second, we sought to validate that text-based transformation of information with numerical and semantic meaning would result in less distortion compared with rule-based transformations (experiment 2). To this end, we experimented with converting <italic>ICD</italic>-based diagnostic codes into text and back, comparing this with the results of converting them to and from the SNOMED-CT coding system. Finally, we evaluated whether the receiving institution could accurately extract specific desired information during the transmission of complex medical information in text form to another institution (experiment 3). In this experiment, we assumed that the content would resemble a discharge summary when all aspects of a patient’s hospital stay were compiled into a text format. Therefore, we aimed to test whether specific data, such as medication information prescribed in the ICU, could be accurately extracted from these summaries. In this experiment, we specifically worked under the assumption that the information to be extracted would be medication information prescribed in the ICU. From the 3 experiments, we aimed to evaluate the possibility of our hypothesis: a potential solution for health care data exchange in the future might be a system that supports flexible communication of raw data for example, in natural language (experiments 1 and 2), permitting the end user to process and interpret such data as required (experiment 3).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Overview of experimental approaches to evaluate LLM performance in data extraction and transformation. This figure outlines the methodologies used in experiments 1, 2, and 3. (A) In experiment 1, lab data from the UK Biobank data set was presented in natural language format and then restructured back into the MIMIC-III format using the LLM. The restructured data were then compared with the original laboratory data. (B) Experiment 2 evaluated the hypothesis that expressing diagnosis names in natural language might be more efficient than mapping them between varying coding systems. Diagnoses recorded in ICD-9-CM were rendered into natural language or SNOMED-CT, and then reverted into ICD-9-CM codes to examine the degree of information distortion. (C) In experiment 3, the LLM was assigned to extract targeted information concerning medications prescribed in the ICU from discharge summaries in the MIMIC-III database. The extracted information was then compared with the actual prescription records to assess the LLM’s accuracy in identifying and extracting details from unstructured text. ICD-9-CM: International Classification of Diseases, Ninth Revision, Clinical Modification; LLM: large language model; MIMIC-III: Medical Information Mart for Intensive Care III; SNOMED-CT: Systematized Nomenclature of Medicine Clinical Terms.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e56614_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Experiment 1: Evaluating Accuracy in Data Exchange via an LLM</title>
        <p>To evaluate the feasibility of data exchange using an LLM, we randomly selected laboratory test result data from 1000 individuals from the UK Biobank data set. For each individual, we gathered laboratory test results and converted them into an unstructured format. Subsequently, the data were restructured to comply with the MIMIC-III data architecture. The prompts used throughout this process are detailed in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
        <boxed-text id="box1" position="float">
          <title>Summary of prompts used in experiments 1, 2, and 3.</title>
          <p>
            <bold>Experiment 1</bold>
          </p>
          <p>Step 1: Translating laboratory test results into free text</p>
          <p>“I have the following patient. Based on this information, summarize the patient’s condition in natural language. Make sure to include all the information presented. The values of the lab results should remain numerical. (For the Sex variable, 0 = female and 1 = male.)”</p>
          <p>{List of lab results}</p>
          <p>Step 2: Transforming free text data into the structured format</p>
          <p>“I have the following patient.”</p>
          <p>{Generated text from the above step}</p>
          <p>“Extract and organize information on the following items.”</p>
          <p>(Add the value next to the variable name with no further explanation.)</p>
          <p>{Defined result extraction format}</p>
          <p>
            <bold>Experiment 2</bold>
          </p>
          <p>Step 1: Translating diagnosis codes to natural language text</p>
          <p>“I have a diagnosis called {Diagnosis code}.</p>
          <p>Describe it in natural language used by doctors and other health care professionals.</p>
          <p>Write it as a single phrase of only a few words (less than 15 words but do not use abbreviations).</p>
          <p>All semantics must be included.”</p>
          <p>Step 2: Translating natural language text to diagnosis codes</p>
          <p>“Where does {Descriptions on diagnosis} fit in the following categories?</p>
          <p>{Categories according to International Classification of Diseases, Ninth Revision, Clinical Modification (ICD-9-CM)}</p>
          <p>Provide the most appropriate ICD-9-CM code directly or choose one of the categories above.</p>
          <p>Choose only one answer that seems the most relevant and answer in the following format.</p>
          <p>The corresponding code: [Code (without periods): a description of the code].”</p>
          <p>
            <bold>Experiment 3</bold>
          </p>
          <p>Step 1: Extracting medication list from discharge summary</p>
          <p>role: “system,” “content:” Your role is to interpret medical records.</p>
          <p>role: “assistant,” “content:” I only need prescriptions from the ICU, not from the general ward or not from outside our hospital.</p>
          <p>Organize by ingredient name, not generic name.</p>
          <p>Never include medications on admission and discharge medications.</p>
          <p>Exclude information before ICU admission or after ICU discharge, even if it is for a hospital stay.</p>
          <p>In other words, exclude prescriptions that were written in a regular ward or emergency room.</p>
          <p>Exclude any medications that may not have been prescribed in the ICU.</p>
          <p>Finally, exclude all prescriptions for procedures, and tests. that are not prescriptions for medication.</p>
          <p>role: “user,” “content:” Observing the following patient record, organize a list of medications prescribed during the ICU visit.</p>
          <p>Organize them in the following format (Provide only the name, not the dose)</p>
          <p>drug name 1</p>
          <p>drug name 2</p>
          <p>If any information on the medications prescribed in the ICU is unavailable, simply answer “None.”</p>
          <p>Step 2: Converting drug names to ingredient names</p>
          <p>{extracted drug list from the above step}</p>
          <p>Organize the above medications by ingredient name.</p>
          <p>If the drug is recorded by trade name, replace it with the ingredient name.</p>
          <p>In the case of multiple ingredient names, record a representative one.</p>
          <p>The format should be a single line of ingredient names with no further explanation, like this</p>
          <p>List: Ingredient 1, Ingredient 2,...</p>
          <p>Step 3: Comparing extracted drug information with actual prescription records</p>
          <p>Here is the medication information extracted from the discharge summary.</p>
          <p>{extracted drug list from the above step}</p>
          <p>These are the medication details actually recorded in the prescription record.</p>
          <p>{Ingredient list from the above step}</p>
          <p>Organize the medication information extracted from the discharge summary by its actual inclusion in the prescription record.</p>
          <p>Medications not mentioned in the discharge summary should not be listed.</p>
          <p>The exact name of the medication may not be recorded, or a synonym may be used.</p>
          <p>In these cases, mark the medication as actually prescribed.</p>
          <p>For example, warfarin might be described as coumadin.</p>
          <p>Record the same medication under different names as the one that was prescribed.</p>
          <p>Match the same ingredient even if the added bases differ.</p>
          <p>For example, the ingredient name of Lopressor is Metoprolol tartrate, but the ingredient must be confirmed as “true” even if it is Metoprolol.</p>
          <p>Ingredient names may be written as abbreviations. For example, acetaminophen may be written as APAP.</p>
          <p>Exclude P.R.N. prescriptions.</p>
          <p>Exclude simple fluid prescriptions.</p>
          <p>Provide only “true” or “false” information for each drug.</p>
          <p>Do not provide Python code. Provide only the results in an array.</p>
          <p>Fill in the blanks with a “true” or “false” result in the following format</p>
          <p>{Defined result extraction format}</p>
        </boxed-text>
        <p>After the conversion to MIMIC-III data format via the LLM, we checked for potential omissions of information and any discrepancies in numerical values. We assessed the absence or presence of data omissions using sensitivity, specificity, and positive and negative predictive values. To assess the accuracy of the conversion, we used values transformed manually as the reference standard. Sensitivity indicated whether information from the original data set also existed in the transformed data. Conversely, specificity pertained to whether data absent in the original were also absent in the transformed data. The positive predictive value (PPV) referred to whether data present in the transformed data also existed in the original, whereas the negative predictive value determined whether data absent in the transformed data were also absent in the original. Numerical discrepancies were calculated only for test results presented in numerical format. They were assessed via the computation of the mean squared error between the original and transformed values.</p>
      </sec>
      <sec>
        <title>Experiment 2: Evaluating Possible Information Distortion During Conversion of Diagnosis Codes</title>
        <p>In this experiment, we explored a scenario of diagnostic codes from the primary data set undergoing transformation for sharing across different institutions or to diverse end users. We aimed to clarify potential discrepancies emerging from transitions between the original and an alternate coding framework. Initially, we used a code-mapping table to facilitate the transition from one coding system to another. Subsequently, we reverted the transformed codes to the original coding framework, and then quantified discrepancies by comparing the reverted data against the primary data set. Using the MIMIC-III database, we converted diagnoses encoded in <italic>ICD-9-CM</italic> to SNOMED-CT, and subsequently reverted the same to <italic>ICD-9-CM</italic>. This conversion was based on a mapping table from a previous study [<xref ref-type="bibr" rid="ref22">22</xref>]. Our proposed approach primarily leveraged the capabilities of the LLM, converting the primary coding structure into a natural text format. For a comparative analysis with the traditional approach, we recoded the text-converted diagnoses into the primary coding system (<italic>ICD-9-CM</italic>) using the LLM, as illustrated in <xref rid="figure2" ref-type="fig">Figure 2</xref>B. However, for this experiment, we excluded E and V codes (supplementary classifications for external causes of injury).</p>
        <p>In assessing the accuracy of the restoration of diagnostic codes, we conducted evaluations based on the depth of the <italic>ICD-9-CM</italic> coding system. The highest level was labeled level 1, with each subsequent, more specific layer labeled level 2, level 3, and so forth. For instance, if the original data had been coded as “401.1 Hypertension, benign” but the restored data were denoted as “401.9 Hypertension, unspecified,” then the evaluation would be a mismatch at level 3. However, at level 2 granularity (ie, “401. Hypertension”), the codes were considered matching.</p>
      </sec>
      <sec>
        <title>Experiment 3: Assessing the Efficacy of LLMs in Extracting Targeted Information From Unstructured Medical Records</title>
        <p>To evaluate the capability of our model in extracting targeted medical information from unstructured text, we selected narrative-style discharge summaries from the EVENTNOTES section of the MIMIC-III database, based on the assumption that they would reflect the comprehensive format typical of patient summaries transmitted between hospitals. These summaries provide a comprehensive account of a patient’s stay in the ICU, including clinicians’ assessments, patient medical history, laboratory results, interpretations of medical imaging, prescriptions, and ensuing care plans. This data set presents a detailed array of narrative insights that illustrate the complexities of patient care, diagnostics, and therapeutic strategies within the ICU context.</p>
        <p>For this experiment, we specifically extracted discharge summaries documented by clinicians. These summaries encapsulated patient diagnoses, vital sign readings, current medication regimens, and other relevant status updates, all expressed in natural language. The prompts used in this process are presented in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
        <p>To evaluate the performance, we compared the information extracted from natural language with the information stored in structured tables. For this assessment, we made a random selection of 1000 discharge summaries, and we used structured data—prescription records—to verify the accuracy of the information retrieved through the LLM. Our focus was on assessing the PPV, representing the precision of the information extracted by the LLM. The extracted information was considered correct if it was also present within the structured data; otherwise, it was classified as incorrect. Notably, not all prescriptions are routinely documented in natural language by clinicians. Generally, only therapeutics significantly influencing the patient’s clinical status would be transcribed in the notes. As such, calculating the negative predictive value (ie, the number of medications not mentioned in the narrative notes that were actually not administered) was deemed impracticable. Similarly, sensitivity (ie, the degree to which prescribed medications are documented in narrative notes) and specificity (ie, the extent to which nonprescribed medications are not mentioned in narrative notes) could not be reliably estimated.</p>
      </sec>
      <sec>
        <title>Used Software</title>
        <p>We accessed ChatGPT (version 3.5) via its API interface. We used Google BigQuery to manage and deploy the MIMIC-III and UK Biobank data sets. We used Python for certain tasks, such as assessing model performance.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Features of the Extracted Data</title>
        <p>In experiment 1, we used the lab test results of 1000 individuals randomly selected from the UK Biobank data set. For experiment 2, we used all diagnosis codes recorded within the MIMIC-III database. Finally, we used 1000 discharge summaries extracted randomly from the MIMIC-III database for experiment 3. <xref ref-type="table" rid="table1">Table 1</xref> presents a detailed summary of the data used across all experiments.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Summary of data used in each experiment.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="370"/>
            <col width="200"/>
            <col width="250"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Experiment 1</td>
                <td>Experiment 2</td>
                <td>Experiment 3</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Database</td>
                <td>UK Biobank</td>
                <td>MIMIC-III<sup>a</sup></td>
                <td>MIMIC-III</td>
              </tr>
              <tr valign="top">
                <td>Data type</td>
                <td>Laboratory test results</td>
                <td>Diagnosis code (<italic>ICD-9-CM</italic><sup>b</sup>)</td>
                <td>Discharge summary</td>
              </tr>
              <tr valign="top">
                <td>Number of records</td>
                <td>502,396</td>
                <td>651,047</td>
                <td>59,652</td>
              </tr>
              <tr valign="top">
                <td>Number of patients</td>
                <td>502,396</td>
                <td>46,520</td>
                <td>41,127</td>
              </tr>
              <tr valign="top">
                <td>Age (years), mean (SD)</td>
                <td>56.53 (8.09)</td>
                <td>64.43 (57.20)</td>
                <td>58.35 (53.63)</td>
              </tr>
              <tr valign="top">
                <td>Sex (male), n (%)</td>
                <td>229,079 (45.6)</td>
                <td>26,121 (56.2)</td>
                <td>23,199 (56.4)</td>
              </tr>
              <tr valign="top">
                <td>Length of text (number. of characters), mean (SD)</td>
                <td>N/A<sup>c</sup></td>
                <td>N/A</td>
                <td>9618.92 (5539.64)</td>
              </tr>
              <tr valign="top">
                <td>Number of tests</td>
                <td>11,973</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Number of diagnosis codes</td>
                <td>N/A</td>
                <td>6984</td>
                <td>N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>MIMIC-III: Medical Information Mart for Intensive Care III.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>ICD-9-CM: International Classification of Diseases, Ninth Revision, Clinical Modification.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Results of Experiment 1: Efficiency of the LLM in Data Transformation and Retrieval</title>
        <p>In experiment 1, our objective was to assess the capability of the LLM in transforming and extracting laboratory results. We randomly selected the laboratory results of 1000 individuals from an initial data set of 502,396 individuals. This resulted in 11,996 data points spanning 13 distinct test items (excluding tests with null results). These data points were subsequently translated into natural language. Remarkably, only 23 items were lost during the transformation process, with 11,973 (99.8%) being successfully converted. Among the transformed data, 24 items did not match their original values perfectly. However, upon closer examination of these discrepancies, all inconsistencies were found to stem from the rounding off of decimal values. For instance, an original BMI value of 24.4383 was translated as 24.44. Consequently, the calculated mean squared error was a minimal 1.76e-07. <xref ref-type="table" rid="table2">Table 2</xref> provides a comprehensive summary of errors for each laboratory test.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Summary of experimental results from data transformation and extraction using LLM in experiment 1.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="80"/>
            <col width="90"/>
            <col width="80"/>
            <col width="110"/>
            <col width="190"/>
            <col width="190"/>
            <col width="100"/>
            <thead>
              <tr valign="bottom">
                <td>Variable</td>
                <td colspan="2">Raw data</td>
                <td colspan="2">After transformation</td>
                <td>Number of data not transferred during the transformation process</td>
                <td>Number of data with changed values during the transformation process</td>
                <td>MSE<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>n, (%)</td>
                <td>Mean (SD)</td>
                <td>n, (%)</td>
                <td>Mean (SD)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Age</td>
                <td>1000 (100)</td>
                <td>56.94 (8.03)</td>
                <td>1000 (100)</td>
                <td>56.94 (8.03)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Sex</td>
                <td>1000 (100)</td>
                <td>0.47 (0.5)</td>
                <td>1000 (100)</td>
                <td>0.47 (0.5)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>BMI</td>
                <td>994 (100)</td>
                <td>27.04 (4.78)</td>
                <td>994 (100)</td>
                <td>27.04 (4.78)</td>
                <td>0</td>
                <td>24</td>
                <td>2.12×10<sup>–6</sup></td>
              </tr>
              <tr valign="top">
                <td>ALT<sup>b</sup></td>
                <td>919 (100)</td>
                <td>23.55 (15.32)</td>
                <td>919 (100)</td>
                <td>23.55 (15.32)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>AST<sup>c</sup></td>
                <td>918 (100)</td>
                <td>26.13 (11.21)</td>
                <td>918 (100)</td>
                <td>26.13 (11.21)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Bilirubin</td>
                <td>772 (100)</td>
                <td>1.84 (0.81)</td>
                <td>772 (100)</td>
                <td>1.84 (0.81)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Creatinine</td>
                <td>920 (100)</td>
                <td>72.94 (18.65)</td>
                <td>907 (98.6)</td>
                <td>72.85 (18.69)</td>
                <td>13</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>GGT<sup>d</sup></td>
                <td>920 (100)</td>
                <td>39.06 (46.96)</td>
                <td>920 (100)</td>
                <td>39.06 (46.96)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>HbA<sub>1c</sub><sup>e</sup></td>
                <td>930 (100)</td>
                <td>35.88 (5.66)</td>
                <td>930 (100)</td>
                <td>35.88 (5.66)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>HDL<sup>f</sup></td>
                <td>846 (100)</td>
                <td>1.46 (0.38)</td>
                <td>846 (100)</td>
                <td>1.46 (0.38)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>LDL<sup>g</sup></td>
                <td>915 (100)</td>
                <td>3.54 (0.88)</td>
                <td>915 (100)</td>
                <td>3.54 (0.88)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Platelet count</td>
                <td>943 (100)</td>
                <td>255.39 (59.79)</td>
                <td>943 (100)</td>
                <td>255.39 (59.79)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Triglycerides</td>
                <td>919 (100)</td>
                <td>1.73 (1.04)</td>
                <td>909 (98.9)</td>
                <td>1.74 (1.05)</td>
                <td>10</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>11996 (100)</td>
                <td>42.9 (69.66)</td>
                <td>11973 (99.8)</td>
                <td>42.9 (69.71)</td>
                <td>23</td>
                <td>24</td>
                <td>1.76×10<sup>–7</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>MSE: mean squared error.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>ALT: alanine transaminase.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>AST: aspartate transaminase.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>GGT: gamma-glutamyl transferase.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>HbA<sub>1c</sub>: hemoglobin A<sub>1c</sub>.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>HDL: high-density lipoprotein.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>LDL: low-density lipoprotein.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Results of Experiment 2: Analysis of Diagnostic Code Conversion (Mapping Table vs Text-Based Methods)</title>
        <p>In the conversion, diagnostic codes were adapted based on a mapping table. Specifically, the original <italic>ICD-9-CM</italic> codes transitioned through SNOMED-CT before being remapped to <italic>ICD-9-CM</italic>. During this procedure, 5748 diagnostic codes expanded to 218,088 codes. This expansion may be attributed to the fact that specific mapping codes do not always allow for a direct 1:1 representation, leading to a 1:n relationship owing to challenges in semantic translation. As an illustration, the <italic>ICD-9-CM</italic> code for “Malignant pleural effusion: Malignant pleural effusion (51181)” was mapped as 2 distinct codes in SNOMED-CT: “Malignant pleural effusion (363346000)” and “Pleural effusion owing to malignant neoplastic disease (disorder) (860792009).” However, when converting through text, the mapping was nearly direct with a 1:1 ratio, ensuring that the 5748 original codes corresponded to 5748 records.</p>
        <p>Assessing the results before and after the conversion, we found that the mapping table achieved the following consistency values: 0.096 (21,000/218,088), 0.248 (54,068/218,088), and 0.626 (136,431/218,088) at levels 3, 2, and 1, respectively. Conversely, when relying on text-based methods, the consistency was higher, with corresponding values of 0.597 (3430/5748), 0.844 (4850/5,748), and 0.904 (5197/5748) for the same levels. An important observation pertained to the accuracy of conversion in relation to frequency use is that as the frequency increased, accuracy followed suit. Specifically, the top 1000 diagnostic names, based on their frequency, achieved values of 0.733, 0.896, and 0.918 at levels 3, 2, and 1, respectively, outperforming less common names. This observed relation was linear, as demonstrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>. These results suggested that the frequent use of diagnostic names may provide better precision when shared between different databases.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Results of converting diagnoses from ICD-9-CM (International Classification of Diseases, Ninth Revision, Clinical Modification) coding to natural language and back to ICD-9-CM. At the highest level, Level 1, most information aligned closely with the original data. However, accuracy decreased as the categories became more specific at levels 2 and 3. Notably, more frequently used diagnoses (toward the left on the x-axis) showed higher conversion accuracy.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e56614_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>During a review of the misclassified instances, we identified several cases as errors based on our evaluation standards. Notably, the semantic core of the original and converted phrases remained largely consistent. For example, we observed a transformation from “51881: Acute respiratory failure” to “78609: Respiratory abnorm NEC: Other respiratory abnormalities.” A comprehensive list of these misclassifications is provided in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>Results of Experiment 3: Effectiveness of the LLM in Extracting Relevant Information From Medical Records</title>
        <p>In reviewing 1000 discharge summaries, the LLM identified a total of 5604 instances of medication prescriptions within the ICU setting. Of these, 2483 perfectly matched the entries in the prescription table, resulting in a PPV of 44.3%. When evaluated based on the shared active ingredient, we found a higher level of agreement, with 5055 out of the 5604 (90.2%) prescriptions showing alignment (<xref ref-type="table" rid="table3">Table 3</xref>). These findings, as exemplified by instances where “Acetaminophen” in the prescription information was referred to as “Paracetamol” in the discharge summaries and cases where “Metoprolol Tartrate” was simply documented as “Metoprolol,” underscore the tendency of physicians to note down familiar medication names. This behavior occurs instead of strictly adhering to the terminology prescribed in the prescription database. These examples highlight a preference for more universally recognized or familiar terms over the precise terminology listed in medical records. Despite this inherent variability in naming conventions, the LLM showed significant effectiveness in identifying and extracting the necessary information.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of drug information extracted from natural language discharge summaries with prescription records.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="580"/>
            <col width="420"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Number of medications, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Medications in the intensive care unit extracted from the discharge summary</td>
                <td>5604 (100)</td>
              </tr>
              <tr valign="top">
                <td>Medications that exactly matched the prescription name</td>
                <td>2483 (44.3)</td>
              </tr>
              <tr valign="top">
                <td>Medications semantically matched by large language model, including synonyms</td>
                <td>5055 (90.2)<sup>a</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>A total of 2572 medications were described using different terminology than the prescription.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>Our research highlighted a new direction in health care by demonstrating the effective use of LLMs in medical data exchange. We aimed to overcome the current challenges related to data sharing among health care institutions, particularly owing to the unstructured nature of several medical records. We successfully validated all the key aspects we aimed to investigate, demonstrating the efficacy of our approach in enhancing health care data interoperability. The experiments revealed that converting hospital data into text format and subsequently integrating the converted data into another hospital’s database was not only feasible but also more accurate and comprehensive compared with traditional data transformation methods. Notably, our findings confirmed that the original data retained their accuracy and integrity when transformed into and back from the text format, a crucial factor in health care where precision is paramount. Moreover, our results indicated that text-based transformation, particularly for semantically rich information such as <italic>ICD</italic>-based diagnostic codes, resulted in significantly less distortion compared with rule-based methods. Finally, our method effectively enhanced medical data exchange by enabling precise extraction of specific information, such as ICU medication details, from text-transmitted data, thus, bolstering health care systems’ efficiency in integrating such data.</p>
      <p>Our study highlights the significant role of LLMs in the field of health care informatics, demonstrating their transformative ability to manage, interpret, and share large volumes of medical data. Traditional data standardization methods, while important, have often been slow and challenging, creating barriers to fast and efficient data exchange. Our results showed that LLMs can not only interpret unstructured data but also convert it into easily understandable formats, greatly reducing the need for time-consuming standardization and allowing for faster data transfer.</p>
      <p>Furthermore, the impact of our research extends beyond the clinical or institutional settings, affecting the broader area of personal health records. Integrating data from multiple providers into a single, unified record has always been a complex task. Different institutions often use varied formats, terminologies, and standards. Our work with LLMs suggested that these models can simplify this integration process. By understanding, transforming, and combining different data sources, LLMs can improve data sharing and enrich the information available.</p>
      <p>LLMs’ adaptability in processing and interpreting structured and unstructured data hints at their potential to significantly enhance the handling of PGHD. Given the variety and unstructured nature of PGHD, from health diaries to wearable technology outputs, our findings suggest a promising avenue for applying LLMs to integrate and understand these diverse data sources more effectively. This capability aligns with our current results. Moreover, it opens up new pathways for creating more personalized and comprehensive approaches to patient care, leveraging the vast and untapped resources of PGHD.</p>
      <p>Our study also provided significant insights into the process of converting diagnostic codes between standard coding systems, such as <italic>ICD-9-CM</italic> and SNOMED-CT. The higher number of diagnostic codes produced through this conversion process highlights the detailed and comprehensive nature of code capture enabled by the LLM. However, the approximate 1:1 ratio achieved in text-based conversions points to a more accurate and straightforward method. Importantly, these text-based conversions emphasize the major advantage of keeping the accuracy of the original data. For frequently used diagnostic terms, this method ensured that the core information from the original data remained consistent. Our examination of misclassifications revealed that, although identified as errors based on our criteria, several converted codes maintained similarity in their underlying meaning. Thus, despite “errors” in conversion, the core medical information is typically retained. Moreover, the direct relationship between the accuracy of conversion and frequency of diagnostic names hints at a possible inherent alignment of standard coding systems with commonly used terms. Our findings highlighted the critical importance of preserving data accuracy when moving between detailed medical coding systems. This aligns with the findings of previous studies, which suggest that using LLMs can lead to more accurate phenotype extraction from medical data [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>].</p>
      <p>Our findings have implications beyond individual health care systems and emphasize the potential for a significant change in the global health care landscape. Our data revealed that using LLMs can enhance international health information exchanges. Such improved communication can lead to better collaboration between countries, potentially benefiting patient care worldwide by ensuring that medical knowledge and practices are more consistently applied. Furthermore, our research points to a new direction in the design and operation of electronic medical record systems. The ability of LLMs to efficiently process and structure natural language data can make extracting, analyzing, and presenting medical data more straightforward. This not only allows for immediate analyses using the latest data but also promotes a more adaptable environment within electronic medical record systems to meet the dynamic needs of the health care sector, as illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
      <p>While our study demonstrates the promising capabilities of LLMs in medical data processing, it is not without limitations. In this study, we used the GPT-3.5 model. Notably, using the newer GPT-4 might lead to better results, given that the efficiency of LLMs is continually improving. Comparative studies have demonstrated that GPT-4 performs better than its predecessors in various domains [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. This progress in language model capabilities indicates the ongoing advancements we can expect. In addition to technological considerations, our reliance on specific data sets such as MIMIC-III and the UK Biobank, while providing valuable insights, introduces limitations regarding representativeness across diverse health care environments and languages. These data sets, representing particular health care settings and populations, may not fully encapsulate the complexity and diversity of global medical practices, especially in non–English speaking countries. This aspect underscores the necessity for broader research in applying LLMs across more varied data sets to ensure generalizability and applicability to different health care contexts. Regarding technological improvements, on-premise solutions can be expected to continue to improve in capabilities. Hence, our research serves as a foundation, showing the feasibility of data exchange based on LLMs. The accuracy and use of these transformations will be enhanced further in future versions. For institutions concerned with security implications, transitioning from externally provided models, such as ChatGPT, to an on-premise, self-built language model is a recommended strategy. Custom-built models can match the performance of GPT-3.5 for specific tasks [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Our choice to evaluate performance using the 3.5 version in this research provides a reference point and offers guidance for users considering the use of their custom language models.</p>
      <p>Our research focused on specific data sets, and more extensive studies involving a wider range of data would be needed to confirm our initial observations. Moreover, the ability of LLMs to handle different types of unstructured data, each with its unique challenges, requires thorough assessment. Nevertheless, with ongoing advancements in artificial intelligence and machine learning, we expect that these challenges will be addressed, and the efficiency of LLMs in managing medical data will continue to improve. Future versions of LLMs, combined with careful validation, can bring significant improvements to health care informatics.</p>
      <sec>
        <title>Conclusions</title>
        <p>In conclusion, our in-depth study provides important insights into the potential transformation of health care data exchange in the near future. The LLMs have a significant role in enhancing medical data sharing, ensuring both precision and efficiency. As technology advances and these language models become more refined, their role in health care data management and communication is anticipated to expand. Their potential goes beyond merely simplifying processes; they might also play a key role in minimizing errors, guaranteeing that medical professionals worldwide can access accurate and timely data. Ultimately, our findings suggest that with the incorporation of LLMs, the global health care landscape could become more unified, facilitating seamless knowledge transfer and collaboration among health care providers everywhere.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>A comprehensive list of misclassifications in experiment 2.</p>
        <media xlink:href="jmir_v26i1e56614_app1.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 38 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">FHIR</term>
          <def>
            <p>Fast Healthcare Interoperability Resources</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ICD-9-CM</term>
          <def>
            <p>International Classification of Diseases, Ninth Revision, Clinical Modification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ICU</term>
          <def>
            <p>intensive care unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MIMIC-III</term>
          <def>
            <p>Medical Information Mart for Intensive Care III</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PGHD</term>
          <def>
            <p>patient-generated health data</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SNOMED-CT</term>
          <def>
            <p>Systematized Nomenclature of Medicine Clinical Terms</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by a grant of the Korea Health Technology R&amp;D (Research and Development) Project through the Korea Health Industry Development Institute (KHIDI), funded by the Ministry of Health and Welfare, Republic of Korea (grant number HI22C0452). This study was conducted using data from UK Biobank (application number 85037).</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets analyzed during this study are available in the UK biobank [<xref ref-type="bibr" rid="ref29">29</xref>] and MIMIC-III [<xref ref-type="bibr" rid="ref30">30</xref>] repository.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vest</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Gamm</surname>
              <given-names>LD</given-names>
            </name>
          </person-group>
          <article-title>Health information exchange: persistent challenges and new strategies</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>288</fpage>
          <lpage>294</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20442146"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2010.003673</pub-id>
          <pub-id pub-id-type="medline">20442146</pub-id>
          <pub-id pub-id-type="pii">17/3/288</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995716</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Archer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Fevrier-Thomas</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Lokker</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McKibbon</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Straus</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>Personal health records: a scoping review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>4</issue>
          <fpage>515</fpage>
          <lpage>522</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21672914"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000105</pub-id>
          <pub-id pub-id-type="medline">21672914</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000105</pub-id>
          <pub-id pub-id-type="pmcid">PMC3128401</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Ash</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Overhage</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Sands</surname>
              <given-names>DZ</given-names>
            </name>
          </person-group>
          <article-title>Personal health records: definitions, benefits, and strategies for overcoming barriers to adoption</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2006</year>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>121</fpage>
          <lpage>126</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/16357345"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M2025</pub-id>
          <pub-id pub-id-type="medline">16357345</pub-id>
          <pub-id pub-id-type="pii">M2025</pub-id>
          <pub-id pub-id-type="pmcid">PMC1447551</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Celik</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Godfrey</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Bringing it all together: wearable data fusion</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>149</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nature.com/articles/s41746-023-00897-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00897-6</pub-id>
          <pub-id pub-id-type="medline">37591989</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00897-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC10435508</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kruse</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Argueta</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Patient and provider attitudes toward the use of patient portals for the management of chronic disease: a systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <volume>17</volume>
          <issue>2</issue>
          <fpage>e40</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/2/e40/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3703</pub-id>
          <pub-id pub-id-type="medline">25707035</pub-id>
          <pub-id pub-id-type="pii">v17i2e40</pub-id>
          <pub-id pub-id-type="pmcid">PMC4376181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Asch</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Volpp</surname>
              <given-names>KG</given-names>
            </name>
          </person-group>
          <article-title>Wearable devices as facilitators, not drivers, of health behavior change</article-title>
          <source>JAMA</source>
          <year>2015</year>
          <volume>313</volume>
          <issue>5</issue>
          <fpage>459</fpage>
          <lpage>460</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2014.14781</pub-id>
          <pub-id pub-id-type="medline">25569175</pub-id>
          <pub-id pub-id-type="pii">2089651</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Voss</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Makadia</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Matcho</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Knoll</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>DeFalco</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Londhe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
          </person-group>
          <article-title>Feasibility and utility of applications of the common data model to multiple, disparate observational health databases</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>553</fpage>
          <lpage>564</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25670757"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocu023</pub-id>
          <pub-id pub-id-type="medline">25670757</pub-id>
          <pub-id pub-id-type="pii">ocu023</pub-id>
          <pub-id pub-id-type="pmcid">PMC4457111</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Overhage</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Hartzema</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Stang</surname>
              <given-names>PE</given-names>
            </name>
          </person-group>
          <article-title>Validation of a common data model for active safety surveillance research</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>54</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22037893"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000376</pub-id>
          <pub-id pub-id-type="medline">22037893</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000376</pub-id>
          <pub-id pub-id-type="pmcid">PMC3240764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saripalle</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Runyan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Russell</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Using HL7 FHIR to achieve interoperability in patient health record</article-title>
          <source>J Biomed Inform</source>
          <year>2019</year>
          <volume>94</volume>
          <fpage>103188</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(19)30106-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2019.103188</pub-id>
          <pub-id pub-id-type="medline">31063828</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(19)30106-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mandel</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Kreda</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Mandl</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>IS</given-names>
            </name>
            <name name-style="western">
              <surname>Ramoni</surname>
              <given-names>RB</given-names>
            </name>
          </person-group>
          <article-title>SMART on FHIR: a standards-based, interoperable apps platform for electronic health records</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <volume>23</volume>
          <issue>5</issue>
          <fpage>899</fpage>
          <lpage>908</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26911829"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv189</pub-id>
          <pub-id pub-id-type="medline">26911829</pub-id>
          <pub-id pub-id-type="pii">ocv189</pub-id>
          <pub-id pub-id-type="pmcid">PMC4997036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suchard</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Krumholz</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Madigan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
          </person-group>
          <article-title>Comprehensive comparative effectiveness and safety of first-line antihypertensive drug classes: a systematic, multinational, large-scale analysis</article-title>
          <source>Lancet</source>
          <year>2019</year>
          <volume>394</volume>
          <issue>10211</issue>
          <fpage>1816</fpage>
          <lpage>1826</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31668726"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(19)32317-7</pub-id>
          <pub-id pub-id-type="medline">31668726</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(19)32317-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6924620</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
          <pub-id pub-id-type="medline">22549152</pub-id>
          <pub-id pub-id-type="pii">nrg3208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiskopf</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Methods and dimensions of electronic health record data quality assessment: enabling reuse for clinical research</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>144</fpage>
          <lpage>151</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22733976"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000681</pub-id>
          <pub-id pub-id-type="medline">22733976</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000681</pub-id>
          <pub-id pub-id-type="pmcid">PMC3555312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fink</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Bischoff</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fink</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Moll</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kroschke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dulz</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Heußel</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Kauczor</surname>
              <given-names>HU</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>TF</given-names>
            </name>
          </person-group>
          <article-title>Potential of ChatGPT and GPT-4 for data mining of free-text CT reports on lung cancer</article-title>
          <source>Radiology</source>
          <year>2023</year>
          <volume>308</volume>
          <issue>3</issue>
          <fpage>e231362</fpage>
          <pub-id pub-id-type="doi">10.1148/radiol.231362</pub-id>
          <pub-id pub-id-type="medline">37724963</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Idnay</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Nestor</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Soroush</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Elias</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Durrett</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rousseau</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Evaluating large language models on medical evidence summarization</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>158</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nature.com/articles/s41746-023-00896-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00896-7</pub-id>
          <pub-id pub-id-type="medline">37620423</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00896-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC10449915</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Paget</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Naugler</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Automated paper screening for clinical reviews using large language models: data analysis study</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <volume>26</volume>
          <fpage>e48996</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e48996/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/48996</pub-id>
          <pub-id pub-id-type="medline">38214966</pub-id>
          <pub-id pub-id-type="pii">v26i1e48996</pub-id>
          <pub-id pub-id-type="pmcid">PMC10818236</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chan You</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Bae</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of GPT-4 for 10-year cardiovascular risk prediction: insights from the UK Biobank and KoGES data</article-title>
          <source>iScience</source>
          <year>2024</year>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>109022</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-0042(24)00243-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.isci.2024.109022</pub-id>
          <pub-id pub-id-type="medline">38357664</pub-id>
          <pub-id pub-id-type="pii">S2589-0042(24)00243-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10865411</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AEW</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LWH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nature.com/articles/sdata201635"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT: the future of discharge summaries?</article-title>
          <source>Lancet Digit Health</source>
          <year>2023</year>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>e107</fpage>
          <lpage>e108</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(23)00021-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(23)00021-3</pub-id>
          <pub-id pub-id-type="medline">36754724</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(23)00021-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Biswas</surname>
              <given-names>SS</given-names>
            </name>
          </person-group>
          <article-title>Role of Chat GPT in public health</article-title>
          <source>Ann Biomed Eng</source>
          <year>2023</year>
          <volume>51</volume>
          <issue>5</issue>
          <fpage>868</fpage>
          <lpage>869</lpage>
          <pub-id pub-id-type="doi">10.1007/s10439-023-03172-7</pub-id>
          <pub-id pub-id-type="medline">36920578</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10439-023-03172-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Dis</surname>
              <given-names>EAM</given-names>
            </name>
            <name name-style="western">
              <surname>Bollen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zuidema</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>van Rooij</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bockting</surname>
              <given-names>CL</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT: five priorities for research</article-title>
          <source>Nature</source>
          <year>2023</year>
          <volume>614</volume>
          <issue>7947</issue>
          <fpage>224</fpage>
          <lpage>226</lpage>
          <pub-id pub-id-type="doi">10.1038/d41586-023-00288-7</pub-id>
          <pub-id pub-id-type="medline">36737653</pub-id>
          <pub-id pub-id-type="pii">10.1038/d41586-023-00288-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <article-title>OHDSI vocabularies repository</article-title>
          <source>ATHENA</source>
          <access-date>2023-12-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://athena.ohdsi.org/search-terms/start">https://athena.ohdsi.org/search-terms/start</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cunningham</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Reeder</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Claggett</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Marti-Castellote</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Khurshid</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Batra</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lubitz</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Maddah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Philippakis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Desai</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Ellinor</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Vardeny</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Solomon</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>JE</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing for adjudication of heart failure in a multicenter clinical trial: a secondary analysis of a randomized clinical trial</article-title>
          <source>JAMA Cardiol</source>
          <year>2024</year>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>174</fpage>
          <lpage>181</lpage>
          <pub-id pub-id-type="doi">10.1001/jamacardio.2023.4859</pub-id>
          <pub-id pub-id-type="medline">37950744</pub-id>
          <pub-id pub-id-type="pii">2811977</pub-id>
          <pub-id pub-id-type="pmcid">PMC10640703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fontoura</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cull</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Beaulieu-Jones</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Kovacheva</surname>
              <given-names>VP</given-names>
            </name>
          </person-group>
          <article-title>Zero-shot interpretable phenotyping of postpartum hemorrhage using large language models</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>212</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nature.com/articles/s41746-023-00957-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00957-x</pub-id>
          <pub-id pub-id-type="medline">38036723</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00957-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC10689487</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taloni</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Borselli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Scarsi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Coco</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Scorcia</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Giannaccare</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Comparative performance of humans versus GPT-4.0 and GPT-3.5 in the self-assessment program of American Academy of Ophthalmology</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>18562</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nature.com/articles/s41598-023-45837-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-023-45837-2</pub-id>
          <pub-id pub-id-type="medline">37899405</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-45837-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC10613606</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosoł</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gąsior</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Łaba</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Korzeniewski</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Młyńczak</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of the performance of GPT-3.5 and GPT-4 on the Polish medical final examination</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>20512</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nature.com/articles/s41598-023-46995-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-023-46995-z</pub-id>
          <pub-id pub-id-type="medline">37993519</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-46995-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC10665355</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Latif</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Fine-tuning ChatGPT for automatic scoring</article-title>
          <source>ArXiv. Preprint posted online on October 16, 2023</source>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv231010072L"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>E Akbas</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Peetoom</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Villouta-Reyes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cerono</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rizk-Jackson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Israni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>E Baranzini</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Biomedical knowledge graph-enhanced prompt generation for large language models</article-title>
          <source>ArXiv. Preprint posted online on November 29, 2023</source>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv231117330S"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sudlow</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gallacher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Beral</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Burton</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Danesh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Downey</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Elliott</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Landray</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Matthews</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Silman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sprosen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Peakman</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>UK biobank: an open access resource for identifying the causes of a wide range of complex diseases of middle and old age</article-title>
          <source>PLoS Med</source>
          <year>2015</year>
          <month>03</month>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>e1001779</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1001779"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1001779</pub-id>
          <pub-id pub-id-type="medline">25826379</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-12-02351</pub-id>
          <pub-id pub-id-type="pmcid">PMC4380465</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AEW</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LWH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <month>05</month>
          <day>24</day>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/sdata.2016.35"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
