<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v27i1e65317</article-id>
      <article-id pub-id-type="pmid">40354109</article-id>
      <article-id pub-id-type="doi">10.2196/65317</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Evaluation and Bias Analysis of Large Language Models in Generating Synthetic Electronic Health Records: Comparative Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Schwartz</surname>
            <given-names>Amy</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Bhattacharya</surname>
            <given-names>Ayush</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chrimes</surname>
            <given-names>Dillon</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Ruochen</given-names>
          </name>
          <degrees>DD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-5297-2970</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Honghan</given-names>
          </name>
          <degrees>DD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0213-5668</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Yuan</surname>
            <given-names>Yuhan</given-names>
          </name>
          <degrees>BD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-9968-8587</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Yifan</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-4505-9341</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Qian</surname>
            <given-names>Hao</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-1932-2560</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Changwei</given-names>
          </name>
          <degrees>DD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6155-4205</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Wei</surname>
            <given-names>Xin</given-names>
          </name>
          <degrees>DD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6183-2298</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>Shan</given-names>
          </name>
          <degrees>DD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-3088-5070</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Xin</given-names>
          </name>
          <degrees>DD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0047-3134</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Kan</surname>
            <given-names>Jingbao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8146-4605</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wan</surname>
            <given-names>Cheng</given-names>
          </name>
          <degrees>DD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2981-5970</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Yun</given-names>
          </name>
          <degrees>DD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <address>
            <institution>The First Affiliated Hospital of Nanjing Medical University</institution>
            <addr-line>300 Guangzhou Road</addr-line>
            <addr-line>Nanjing, 210036</addr-line>
            <country>China</country>
            <phone>86 83714511</phone>
            <email>yun_liu@njmu.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4311-3772</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Biomedical Engineering and Informatics</institution>
        <institution>Nanjing Medical University</institution>
        <addr-line>Nanjing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>University College London</institution>
        <addr-line>London</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>The Pervasive Communication Center</institution>
        <institution>Purple Mountain Laboratories</institution>
        <addr-line>Nanjing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Nanjing University of Posts and Telecommunications</institution>
        <addr-line>Nanjing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>The First Affiliated Hospital of Nanjing Medical University</institution>
        <addr-line>Nanjing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Yun Liu <email>yun_liu@njmu.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>12</day>
        <month>5</month>
        <year>2025</year>
      </pub-date>
      <volume>27</volume>
      <elocation-id>e65317</elocation-id>
      <history>
        <date date-type="received">
          <day>1</day>
          <month>9</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>11</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>4</day>
          <month>2</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>29</day>
          <month>3</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Ruochen Huang, Honghan Wu, Yuhan Yuan, Yifan Xu, Hao Qian, Changwei Zhang, Xin Wei, Shan Lu, Xin Zhang, Jingbao Kan, Cheng Wan, Yun Liu. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 12.05.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2025/1/e65317" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Synthetic electronic health records (EHRs) generated by large language models (LLMs) offer potential for clinical education and model training while addressing privacy concerns. However, performance variations and demographic biases in these models remain underexplored, posing risks to equitable health care.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to systematically assess the performance of various LLMs in generating synthetic EHRs and to critically evaluate the presence of gender and racial biases in the generated outputs. We focused on assessing the completeness and representativeness of these EHRs across 20 diseases with varying demographic prevalence.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A framework was developed to generate 140,000 synthetic EHRs using 10 standardized prompts across 7 LLMs. The electronic health record performance score (EPS) was introduced to quantify completeness, while the statistical parity difference (SPD) was proposed to assess the degree and direction of demographic bias. Chi-square tests were used to evaluate the presence of bias across demographic groups.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Larger models exhibited superior performance but heightened biases. The Yi-34B achieved the highest EPS (96.8), while smaller models (Qwen-1.8B: EPS=63.35) underperformed. Sex polarization emerged: female-dominated diseases (eg, multiple sclerosis) saw amplified female representation in outputs (Qwen-14B: 973/1000, 97.3% female vs 564,424/744,778, 75.78% real; SPD=+21.50%), while balanced diseases and male-dominated diseases skewed the male group (eg, hypertension Llama 2-13 B: 957/1000, 95.7% male vs 79,540,040/152,466,669, 52.17% real; SPD=+43.50%). Racial bias patterns revealed that some models overestimated the representation of White (eg, Yi-6B: mean SPD +14.40%, SD 16.22%) or Black groups (eg, Yi-34B: mean SPD +14.90%, SD 27.16%), while most models systematically underestimated the representation of Hispanic (average SPD across 7 models is –11.93%, SD 8.36%) and Asian groups (average SPD across 7 models is –0.77%, SD 11.99%).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Larger models, such as Yi-34B, Qwen-14B, and Llama 2 to 13 B, showed improved performance in generating more comprehensive EHRs, as reflected in higher EPS values. However, this increased performance was accompanied by a notable escalation in both gender and racial biases, highlighting a performance-bias trade-off. The study identified 4 key findings as follows: (1) as model size increased, EHR generation improved, but demographic biases also became more pronounced; (2) biases were observed across all models, not just the larger ones; (3) gender bias closely aligned with real-world disease prevalence, while racial bias was evident in only a subset of diseases; and (4) racial biases varied, with some diseases showing overrepresentation of White or Black populations and underrepresentation of Hispanic and Asian groups. These findings underline the need for effective bias mitigation strategies and the development of benchmarks to ensure fairness in artificial intelligence applications for health care.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>large language models</kwd>
        <kwd>electronic health records</kwd>
        <kwd>gender bias</kwd>
        <kwd>racial bias</kwd>
        <kwd>performance evaluation</kwd>
        <kwd>artificial intelligence</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background and Objectives</title>
        <p>The integration of artificial intelligence (AI) in health care has opened up new possibilities, especially with the advent of large language models (LLMs) capable of generating synthetic electronic health records (EHRs) [<xref ref-type="bibr" rid="ref1">1</xref>]. These synthetic records hold significant promise for clinical education and model training, offering a way to mitigate privacy concerns while still providing realistic, diverse patient data for medical research and training purposes [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. However, the widespread deployment of synthetic EHRs in clinical practice is not without its challenges, particularly regarding the performance and biases inherent in these models. The main research question of this study is whether, despite their ability to generate high-quality synthetic EHRs, LLMs inadvertently introduce significant gender and racial biases into the data. In addition, it seeks to explore whether the size of the model and the real-world disease distribution have any correlation with these biases.</p>
        <p>This study aimed to evaluate the performance of multiple LLMs in generating synthetic EHRs, with a focus on the completeness and demographic representativeness of the generated records. We hypothesized that larger models, while achieving better performance in EHR generation, are likely to exhibit greater gender and racial biases, reflecting the limitations of their training data and underlying architecture. In particular, we aimed to investigate the relationship between model size, real-world disease distribution, and demographic biases.</p>
      </sec>
      <sec>
        <title>Literature Review and Research Gap</title>
        <p>Previous research has explored the potential of synthetic EHRs for clinical education, highlighting their role in training medical professionals and improving health care delivery by offering access to large, diverse patient datasets [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. However, little attention has been paid to the biases that these models may perpetuate, especially regarding gender and race [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. These biases threaten not only the fairness and accuracy of case analyses but also risk exacerbating existing societal disparities [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. While studies have demonstrated that LLMs can reproduce known societal biases, including gender and racial disparities, few have systematically investigated these biases in the context of synthetic EHR generation. For example, research has shown that models like GPT-4 often exhibit gender biases, such as overrepresenting male patients in certain medical scenarios, despite the real-world prevalence of conditions like HIV and multiple sclerosis, which have gender-specific distribution patterns [<xref ref-type="bibr" rid="ref19">19</xref>]. This research sought to fill this gap by systematically analyzing the performance and biases of synthetic EHRs generated by LLM across a range of diseases with varying gender and racial prevalence. Specifically, we expanded upon existing literature by examining the following 3 key aspects: (1) whether racial and gender biases are widespread in synthetic EHRs generated by LLM; (2) the impact of model size on racial and gender biases within the synthetic EHRs; and (3) how the real-world distribution of diseases influences the racial and gender biases present in these synthetic records. By addressing these questions, we aimed to deepen our understanding of how the scale of LLMs and the inherent characteristics of disease prevalence may contribute to the perpetuation of demographic biases, ultimately influencing the utility and fairness of synthetic EHRs in clinical and research settings.</p>
      </sec>
      <sec>
        <title>Significance and Practical Implications</title>
        <p>The practical implications of this work are significant. If LLMs used to generate synthetic EHRs are biased, they may inadvertently perpetuate health care disparities, reinforcing inequities in medical education and patient care. By addressing gender and racial biases in LLMs, we can develop more equitable models that better serve diverse patient populations, ensuring that synthetic data reflects the demographic realities of real-world health care.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Model Selection and Setup</title>
        <p>To evaluate the performance and biases of LLMs in generating synthetic EHRs, we used a framework illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>. This framework outlines a systematic process for generating EHRs and conducting information extraction and analysis. The process is divided into 3 key modules: prompt generation, EHR generation, and information extraction and analysis, as described in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
        <p>The 7 open-source LLMs selected for this study were chosen based on the considerations provided in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>.</p>
        <p>By incorporating models with diverse linguistic capabilities, parameter sizes, and recognition in the open-source community, this study provides a comprehensive evaluation of LLM performance and biases in synthetic EHR generation. A detailed summary of the selected models, including their publishers, primary languages, sizes, and benchmark performances, is presented in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Framework for evaluating the performance and bias of large language models (LLMs) in generating synthetic electronic health records (EHRs).</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e65317_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <boxed-text id="box1" position="float">
          <title>Systematic process for generating electronic health records (EHRs) and conducting information extraction and analysis.</title>
          <p>
            <bold>Prompt generation</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>In total, 10 standardized template prompts were meticulously designed for this study to generate synthetic EHRs. Each template included a placeholder (disease), which was replaced by one of 20 distinct diseases spanning 5 categories: epidemics, chronic conditions, mental health disorders, rare diseases, and diseases associated with geographic or socioeconomic factors. These carefully crafted prompts ensured the generation of comprehensive and realistic EHRs containing patient demographics, clinical details, past medical history, and other relevant information. For detailed information on prompt construction and disease, please refer to <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref39">39</xref>].</p>
            </list-item>
          </list>
          <p>
            <bold>EHR generation</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>To minimize the variability in outputs caused by differences in prompts and the inherent uncertainty of large language model responses, each model used the same set of 10 prompts per disease to generate synthetic EHRs. For each prompt, 100 cases were generated, resulting in a total of 1000 cases per disease. Across 20 diseases, each model generated 20,000 synthetic EHRs. With 7 models in total, the study produced a comprehensive dataset of 140,000 synthetic EHRs.</p>
            </list-item>
          </list>
          <p>
            <bold>Information extraction and analysis</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Once the synthetic EHRs were generated, patient attributes were extracted using custom-developed regular expressions. To ensure accuracy, the extracted attributes underwent a secondary round of manual verification. These attributes were then subjected to detailed performance analysis to evaluate the models’ ability to generate complete records, as well as bias analysis to examine gender and racial disparities in the generated outputs.</p>
            </list-item>
          </list>
        </boxed-text>
        <boxed-text id="box2" position="float">
          <title>Considerations for the selection of open-source large language models.</title>
          <p>
            <bold>Language capabilities reflecting diverse training corpora and cultural contexts</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>The selected models represented a variety of linguistic and cultural backgrounds. Monolingual English models, such as those from the Llama 2 series, provided a baseline for English-language performance. In contrast, multilingual models, such as those from the Yi and Qwen series, which are predominantly trained on Chinese data, enabled us to explore the influence of culturally and linguistically diverse corpora on generative capabilities and biases. This diversity was essential for understanding how language and culture affected the generation of synthetic EHRs.</p>
            </list-item>
          </list>
          <p>
            <bold>Availability of multiple model sizes</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>The selected models spanned a range of sizes, from smaller models like Qwen-1.8B to larger ones like Yi-34B. This allowed for an examination of how model size influenced both performance and biases, providing insight into the trade-offs between computational efficiency and output quality.</p>
            </list-item>
          </list>
          <p>
            <bold>Prominence in the open-source ecosystem</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>All selected models are widely recognized within the open-source community, ensuring transparency, reproducibility, and accessibility for future research. The performance of these models is often evaluated using well-established benchmarks, such as Massive Multitask Language Understanding (MMLU) and comprehensive Chinese evaluation suite for foundation models (C-Eval). MMLU scores, commonly referenced in English-speaking academic circles [<xref ref-type="bibr" rid="ref40">40</xref>], and C-Eval, a widely used metric in Chinese-speaking domains [<xref ref-type="bibr" rid="ref41">41</xref>], provide critical indicators of model performance.</p>
            </list-item>
          </list>
        </boxed-text>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Large language models’ information.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="160"/>
            <col width="210"/>
            <col width="200"/>
            <col width="150"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Publisher</td>
                <td>Primary language</td>
                <td>Model size (billion)</td>
                <td>MMLU<sup>a</sup> score</td>
                <td>C-Eval<sup>b</sup> score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Yi-6B</td>
                <td>01.AI</td>
                <td>English and Chinese</td>
                <td>6B</td>
                <td>64.11</td>
                <td>72</td>
              </tr>
              <tr valign="top">
                <td>Yi-34B</td>
                <td>01.AI</td>
                <td>English and Chinese</td>
                <td>34B</td>
                <td>79.4</td>
                <td>81.4</td>
              </tr>
              <tr valign="top">
                <td>Qwen-1.8B</td>
                <td>Alibaba Group</td>
                <td>English and Chinese</td>
                <td>1.8B</td>
                <td>45.3</td>
                <td>56.1</td>
              </tr>
              <tr valign="top">
                <td>Qwen-7B</td>
                <td>Alibaba Group</td>
                <td>English and Chinese</td>
                <td>7B</td>
                <td>58.2</td>
                <td>63.5</td>
              </tr>
              <tr valign="top">
                <td>Qwen-14B</td>
                <td>Alibaba Group</td>
                <td>English and Chinese</td>
                <td>14B</td>
                <td>66.3</td>
                <td>72.1</td>
              </tr>
              <tr valign="top">
                <td>Llama2-7B</td>
                <td>META</td>
                <td>English</td>
                <td>7B</td>
                <td>45.3</td>
                <td>—<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>Llama2-13B</td>
                <td>META</td>
                <td>English</td>
                <td>13B</td>
                <td>54.8</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>MMLU: Massive Multitask Language Understanding.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>C-Eval: comprehensive Chinese evaluation suite for foundation models.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Llama2-7B and Llama2-13B have not released their C-Eval test results, and no related records are found in the C-Eval rankings.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The detailed explanation of the parameters in <xref ref-type="table" rid="table1">Table 1</xref> is provided in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref>.</p>
        <p>To support the operation of these models, we deployed 4 high-performance NVIDIA 3090 graphics cards and 2 A800 graphics cards and used the Python programming language (Python Software Foundation) to call the LLMs and obtain the generated results.</p>
        <boxed-text id="box3" position="float">
          <title>Detailed explanation of the large language model parameters.</title>
          <p>
            <bold>Primary language</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>This indicates the main languages that the model is optimized for. For example, it specifies whether the model is primarily designed to perform well in English, Chinese, or both. It is essential to consider this when evaluating the model’s utility in multilingual or specific linguistic contexts.</p>
            </list-item>
          </list>
          <p>
            <bold>Model size (billion)</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>It refers to the number of parameters (in billions) within the large language model. The parameter size reflects the model’s complexity and potential capacity for learning and generalization. Larger models typically perform better but require more computational resources.</p>
            </list-item>
          </list>
          <p>
            <bold>Massive Multitask Language Understanding (MMLU) score</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>This benchmark is widely used in English-speaking academic circles to assess a model’s ability to perform well across a variety of tasks, including knowledge-intensive and reasoning tasks. A higher MMLU score indicates stronger performance in English-language tasks.</p>
            </list-item>
          </list>
          <p>
            <bold>Comprehensive Chinese evaluation suite for foundation models (C-Eval) score</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>A benchmark designed to evaluate a model’s performance in Chinese tasks. It is commonly referenced in Chinese-speaking academic domains and includes tasks that test the model’s understanding and reasoning capabilities in the Chinese language. A higher C-Eval score reflects better proficiency in Chinese.</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study was designed and conducted with strict adherence to ethical standards, without involving any human subjects, patient information, medical records, or observations of public behaviors. All data utilized in this research were synthetic and generated computationally, thereby eliminating the necessity for informed consent, privacy, confidentiality measures, or participant compensation. Accordingly, we confirmed that there was no requirement for institutional review board approval, in compliance with institutional policies regarding exemption from ethical review for research involving solely synthetic data.</p>
      </sec>
      <sec>
        <title>Model Performance Evaluation</title>
        <p>Synthetic EHR generation involves creating records that comprehensively cover key medical attributes (eg, sex, age, and medical history) to ensure their clinical relevance and usability. However, the generated outputs can vary in quality and completeness, leading to 3 distinct categories as follows:</p>
        <list list-type="order">
          <list-item>
            <p>Irrelevant Content (<italic>N<sub>i</sub></italic>): Records that fail to align with the required medical attributes, often due to hallucinations, such as fabricating unrelated information or omitting critical details. These outputs are entirely unsuitable for clinical use.</p>
          </list-item>
          <list-item>
            <p>Partially Relevant Content (Ns): Records that include some, but not all, of the specified attributes. While partially useful, these outputs lack the completeness required for fully reliable EHRs. A subset of these records (Ns–i) contains specific attributes Ai but still does not meet the full requirements.</p>
          </list-item>
          <list-item>
            <p>Fully Relevant Content (Na): Records that accurately and completely include all specified attributes, representing the ideal output for synthetic EHR generation.</p>
          </list-item>
        </list>
        <p>The total number of synthetic EHRs <italic>N</italic> produced is the sum of these scenarios:</p>
        <disp-formula>N=N<sub>i</sub> + N<sub>s</sub> + N<sub>a</sub> <bold>(1)</bold></disp-formula>
        <p>To evaluate the performance of the EHR generation task in LLMs, we used 2 metrics: the electronic health record performance score (EPS) and the attribute-specific EPS (EPS<sub>i</sub>)</p>
        <disp-formula>EPS = N<sub>a</sub>/N <bold>(2)</bold></disp-formula>
        <p>EPS measures the proportion of fully complete and accurate EHRs among all generated records. A higher EPS indicates the model’s ability to consistently generate reliable and clinically relevant outputs.</p>
        <disp-formula>EPS<sub>i</sub> = (N<sub>s-i</sub> + N<sub>a</sub>)/N <bold>(3)</bold></disp-formula>
        <p>EPS<sub>i</sub> assesses the model’s capability to generate records that include a specific attribute A<sub>i</sub> This metric provides a more granular evaluation, helping to identify which attributes the model can reliably generate and which may require further optimization.</p>
        <p>Due to constraints in scope and length, this study specifically examined the generative capabilities of LLMs concerning gender and racial attributes, hence defining <italic>A</italic> = {<italic>A<sub>gender</sub></italic>, <italic>A<sub>race</sub></italic>}. To assess the capabilities of LLMs in generating synthetic EHRs, we used the EPS, EPS<italic><sub>gender</sub></italic>, and EPS<italic><sub>race</sub></italic>.</p>
      </sec>
      <sec>
        <title>Model Bias Evaluation</title>
        <p>In this study, gender bias and racial bias are systematically defined and analyzed to assess their presence in synthetic EHRs generated by LLMs.</p>
        <p>Gender bias is defined as the deviation in the distribution of male and female cases generated by LLMs compared to real-world gender prevalence for specific diseases. Similarly, racial bias refers to the deviation in the distribution of cases for different racial or ethnic groups compared to real-world racial prevalence. Both biases occur when the proportion of synthetic EHRs significantly diverges from actual epidemiological data.</p>
        <p>To detect such discrepancies, chi-square tests were used, with a <italic>P</italic> value &lt;.05 indicating statistically significant bias. Real-world prevalence data for 20 diseases in the United States, obtained through an exhaustive literature review [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref39">39</xref>], were used as benchmarks for evaluation.</p>
        <p>When significant discrepancies were identified, statistical parity difference (SPD) was used to quantify bias and classify groups as either:</p>
        <list list-type="bullet">
          <list-item>
            <p>Overrepresented: SPD&gt;+10%, where the generated proportion significantly exceeds the real-world prevalence.</p>
          </list-item>
          <list-item>
            <p>Underrepresented: SPD&lt;–10%, where the generated proportion falls significantly below the real-world prevalence.</p>
          </list-item>
        </list>
        <p>SPD is calculated as:</p>
        <disp-formula>SPD = P<sub>generated</sub> – P<sub>real</sub> <bold>(4)</bold></disp-formula>
        <p>Where <italic>P<sub>generated</sub></italic> is the proportion of cases generated for a group, and <italic>P<sub>real</sub></italic> is the real-world prevalence.</p>
        <p>We further introduced the gender polarization effect and racial polarization effect to describe the tendency of LLMs to increasingly favor one gender or racial group as model size increases. This effect is characterized by a convergence toward a bias-polarized gender or bias-polarized race, where one group becomes disproportionately overrepresented while others are underrepresented.</p>
        <p>For example, in diseases with balanced sex distributions (eg, hypertension: 72,926,629/152,466,669, 47.83% female vs 79,540,040/152,466,669, 52.17% male), larger models may disproportionately generate male cases, designating the male group as the bias-polarized sex. Similarly, in female-dominated diseases like lupus (29,578/33,145, 89.24% female vs 3567/33,145, 10.76% male), the overrepresentation of the female group may intensify as model size increases.</p>
        <p>A similar pattern is observed for racial bias. In diseases with balanced racial distributions, larger models may overrepresent one racial group (eg, White individuals) while underrepresenting others (eg, Black or Hispanic populations), designating the overrepresented group as the bias-polarized race.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Model Performance</title>
        <p>There was a clear depiction of the performance metrics across different models in <xref ref-type="table" rid="table2">Table 2</xref>, which quantitatively assessed each model’s capability to generate synthetic EHRs.</p>
        <p>The analysis of EPS scores across models indicated a clear correlation between the size of the models and their effectiveness in generating synthetic EHRs. Larger models consistently showed higher EPS values, suggesting enhanced performance in EHR generation tasks. For instance, the Yi-34B model, one of the largest models evaluated, boasted the highest EPS at 96.8, followed closely by its EPS<italic><sub>race</sub></italic> at 96.84 and EPS<italic><sub>gender</sub></italic> at 98.82. This was significantly higher compared to the smallest model, Qwen-1.8B, which scored only 63.35 in EPS, 65.05 in EPS<italic><sub>race</sub></italic>, and 83.31 in EPS<italic><sub>gender</sub></italic>. These figures explicitly demonstrated that larger models were more capable of producing comprehensive and detailed EHRs.</p>
        <p>The EPS, along with its race and gender derivatives, offered detailed insights into each model’s precision in generating specific metrics within synthetic EHRs. When comparing models of similar sizes, it was evident that English LLMs, such as the Llama2 series, excelled in producing higher accuracy in racial data. The Llama2-7B, for instance, scored a 92.9 in EPS<italic><sub>race</sub></italic>, outperforming similarly sized models like Qwen-7B and Yi-6B, which recorded EPS<italic><sub>race</sub></italic> scores of 91.1 and 78.12, respectively. This pattern highlighted the capability of larger English LLMs to handle racial diversity effectively within synthetic EHRs, providing a nuanced and accurate portrayal of diverse patient demographics.</p>
        <p>Traditional metrics, such as Massive Multitask Language Understanding (MMLU) and comprehensive Chinese evaluation suite for foundation models (C-Eval), were typically used to assess the general cognitive capabilities of models. However, these metrics did not always align with the specialized performance insights provided by EPS. For example, despite having a moderate MMLU score of 54.8, the Llama2-13B achieved a significantly higher EPS of 93.37, indicating that while the model may not have scored the highest on general tasks, it excelled in the specific task of EHR generation. Similarly, the Yi-6B model had a relatively high MMLU score of 64.11 but an EPS of only 77.31, suggesting that higher general cognitive scores did not necessarily translate to better performance.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Performance metrics across different models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="220"/>
            <col width="90"/>
            <col width="150"/>
            <col width="170"/>
            <col width="110"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Model size (billion)</td>
                <td>EPS<sup>a</sup></td>
                <td>Race(EPS)<sup>b</sup></td>
                <td>Gender(EPS)<sup>c</sup></td>
                <td>MMLU<sup>d</sup></td>
                <td>C-Eval<sup>e</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Yi-34B</td>
                <td>34</td>
                <td>96.8</td>
                <td>96.84</td>
                <td>98.82</td>
                <td>74.9</td>
                <td>81.4</td>
              </tr>
              <tr valign="top">
                <td>Llama2-13B</td>
                <td>13</td>
                <td>93.37</td>
                <td>93.48</td>
                <td>94.63</td>
                <td>54.8</td>
                <td>—<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td>Llama2-7B</td>
                <td>7</td>
                <td>92.72</td>
                <td>92.9</td>
                <td>93.56</td>
                <td>45.3</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Qwen-7B</td>
                <td>7</td>
                <td>90.93</td>
                <td>91.1</td>
                <td>97.29</td>
                <td>58.2</td>
                <td>63.5</td>
              </tr>
              <tr valign="top">
                <td>Qwen-14B</td>
                <td>14</td>
                <td>88.81</td>
                <td>88.82</td>
                <td>98.99</td>
                <td>66.3</td>
                <td>72.1</td>
              </tr>
              <tr valign="top">
                <td>Yi-6B</td>
                <td>6</td>
                <td>77.31</td>
                <td>78.12</td>
                <td>85.37</td>
                <td>64.11</td>
                <td>72</td>
              </tr>
              <tr valign="top">
                <td>Qwen-1.8B</td>
                <td>1.8</td>
                <td>63.35</td>
                <td>65.05</td>
                <td>83.31</td>
                <td>45.3</td>
                <td>56.1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>EPS: electronic health record performance score.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Race(EPS):EPS<sub>race</sub>.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>Gender(EPS):EPS<sub>gender</sub>.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>MMLU: Massive Multitask Language Understanding.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>C-Eval: comprehensive Chinese evaluation suite for foundation models.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>Llama2-7B and Llama2-13B have not released their C-Eval test results, and no related records are found in the C-Eval rankings.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Gender Bias</title>
        <p>Gender bias was found to be pervasive in all models, regardless of their size. We used the chi-square test to assess whether the distribution of the generated clinical EHRs aligned with the objective distribution, using a significance level of 0.05. The detailed results, including <italic>P</italic> values, chi-square values, and <italic>df</italic>, are presented in Tables S1-S7 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> (chi-square analysis results), with the key findings summarized in <xref ref-type="table" rid="table3">Table 3</xref> In total, 17 diseases exhibited significant differences across the 5 models, while 14 diseases demonstrated significant differences across the 7 models (<italic>P</italic>&lt;.05) as shown in <xref ref-type="table" rid="table3">Table 3</xref>. Only 2 gender-related diseases did not show significant differences.</p>
        <p>Furthermore, a comparison of <xref ref-type="table" rid="table3">Table 3</xref> and the EPS values revealed that the Yi-6B and Qwen-1.8B models exhibited fewer biases in the generated cases. This was primarily due to these models generating an excess of invalid data. Consequently, gender bias was a widespread issue in the task of generating synthetic EHRs.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Gender bias results of 7 large language models for 20 diseases.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="80"/>
            <col width="250"/>
            <col width="550"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>EPS<sup>a</sup></td>
                <td>Number of diseases with gender bias</td>
                <td>Diseases without gender bias</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Llama 2-7 B</td>
                <td>92.72</td>
                <td>18</td>
                <td>Preeclampsia and prostate cancer</td>
              </tr>
              <tr valign="top">
                <td>Llama 2-13 B</td>
                <td>93.37</td>
                <td>18</td>
                <td>Preeclampsia and prostate cancer</td>
              </tr>
              <tr valign="top">
                <td>Qwen-14B</td>
                <td>88.81</td>
                <td>18</td>
                <td>Preeclampsia and prostate cancer</td>
              </tr>
              <tr valign="top">
                <td>Qwen-7B</td>
                <td>90.925</td>
                <td>18</td>
                <td>Preeclampsia and prostate cancer</td>
              </tr>
              <tr valign="top">
                <td>Yi-34B</td>
                <td>96.8</td>
                <td>17</td>
                <td>Preeclampsia, prostate cancer, and multiple sclerosis</td>
              </tr>
              <tr valign="top">
                <td>Yi-6B</td>
                <td>77.31</td>
                <td>14</td>
                <td>Preeclampsia, prostate cancer, major depressive disorder, sarcoidosis, rheumatoid arthritis, and takotsubo cardiomyopathy</td>
              </tr>
              <tr valign="top">
                <td>Qwen-1.8B</td>
                <td>63.35</td>
                <td>14</td>
                <td>Preeclampsia, prostate cancer, colon cancer, hypertension, lupus, and sarcoidosis</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>EPS: electronic health record performance score</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>As model size increased, gender bias became more pronounced. This trend was observed across multiple diseases, with larger models showing more extreme gender imbalances when compared to the real-world gender distributions as shown in <xref ref-type="table" rid="table4">Tables 4</xref>-<xref ref-type="table" rid="table6">6</xref>. Although the trend was not statistically significant, the SPD, used to quantify gender bias, revealed a consistent pattern: larger models generated increasingly polarized gender distributions, especially when compared to the actual gender prevalence for each disease.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Gender bias distribution in Qwen-1.8B (1.8 billion parameters), Qwen-7B (7 billion parameters), and Qwen-14B (14 billion parameters).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="130"/>
            <col width="140"/>
            <col width="100"/>
            <col width="140"/>
            <col width="100"/>
            <col width="140"/>
            <col width="100"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Diseases and gender</td>
                <td colspan="2">Qwen-1.8B</td>
                <td colspan="2">Qwen-7B</td>
                <td colspan="2">Qwen-14B</td>
                <td>Actual ratio (%)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Ratio (n=1000, %)</td>
                <td>SPD<sup>a</sup> (%)</td>
                <td>Ratio (n=1000, %)</td>
                <td>SPD (%)</td>
                <td>Ratio (n=1000, %)</td>
                <td>SPD (%)</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="9">
                  <bold>Lupus</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>15.3</td>
                <td>—<sup>b</sup></td>
                <td>3.3</td>
                <td>—</td>
                <td>1.6</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>7.5</td>
                <td>–3.2</td>
                <td>1.1</td>
                <td>–9.6</td>
                <td>0.5</td>
                <td>–10.2<sup>c</sup></td>
                <td>10.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>77.2</td>
                <td>–12.1<sup>c</sup></td>
                <td>95.6</td>
                <td>+6.3</td>
                <td>97.9</td>
                <td>+8.6</td>
                <td>89.3</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Takotsubo cardiomyopathy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>11.8</td>
                <td>—</td>
                <td>2.5</td>
                <td>—</td>
                <td>0.5</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>23.8</td>
                <td>+10.7<sup>c</sup></td>
                <td>0.4</td>
                <td>–12.7<sup>c</sup></td>
                <td>0.6</td>
                <td>–12.5<sup>c</sup></td>
                <td>13.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>64.4</td>
                <td>–22.5<sup>c</sup></td>
                <td>97.1</td>
                <td>+10.2<sup>c</sup></td>
                <td>98.9</td>
                <td>+12.0<sup>c</sup></td>
                <td>86.9</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Multiple sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>11.7</td>
                <td>—</td>
                <td>2.9</td>
                <td>—</td>
                <td>1.9</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>16.6</td>
                <td>–7.6</td>
                <td>5.3</td>
                <td>–18.9<sup>c</sup></td>
                <td>0.8</td>
                <td>–23.4<sup>c</sup></td>
                <td>24.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>71.7</td>
                <td>–4.1</td>
                <td>91.8</td>
                <td>+16.0<sup>c</sup></td>
                <td>97.3</td>
                <td>+21.5<sup>c</sup></td>
                <td>75.8</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Rheumatoid arthritis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>14.5</td>
                <td>—</td>
                <td>2.3</td>
                <td>—</td>
                <td>0.7</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>16.1</td>
                <td>–9.8</td>
                <td>1.4</td>
                <td>–24.5<sup>c</sup></td>
                <td>0.6</td>
                <td>–25.3<sup>c</sup></td>
                <td>25.9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>69.4</td>
                <td>–4.7</td>
                <td>96.3</td>
                <td>22.2<sup>c</sup></td>
                <td>98.7</td>
                <td>+24.6<sup>c</sup></td>
                <td>74.1</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Major depressive disorder</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>9</td>
                <td>—</td>
                <td>2.9</td>
                <td>—</td>
                <td>0.9</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>24</td>
                <td>–12.7<sup>c</sup></td>
                <td>10.7</td>
                <td>–26.0<sup>c</sup></td>
                <td>0.6</td>
                <td>–36.1<sup>c</sup></td>
                <td>36.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>67</td>
                <td>+3.7</td>
                <td>86.4</td>
                <td>23.1<sup>c</sup></td>
                <td>98.5</td>
                <td>+35.2<sup>c</sup></td>
                <td>63.3</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Huntington disease</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>10</td>
                <td>—</td>
                <td>1.1</td>
                <td>—</td>
                <td>0.7</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>62.1</td>
                <td>+12.1<sup>c</sup></td>
                <td>87.9</td>
                <td>+37.9<sup>c</sup></td>
                <td>75.1</td>
                <td>+25.1<sup>c</sup></td>
                <td>50</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>27.9</td>
                <td>+22.1<sup>c</sup></td>
                <td>11.0</td>
                <td>–39.0<sup>c</sup></td>
                <td>24.2</td>
                <td>–25.8<sup>c</sup></td>
                <td>50</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Bacterial pneumonia</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>14.2</td>
                <td>—</td>
                <td>2.4</td>
                <td>—</td>
                <td>1.1</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>28.7</td>
                <td>–22.3<sup>c</sup></td>
                <td>88.6</td>
                <td>+37.6<sup>c</sup></td>
                <td>97.8</td>
                <td>+46.8<sup>c</sup></td>
                <td>51</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>57.1</td>
                <td>+8.1</td>
                <td>9.0</td>
                <td>–40.0<sup>c</sup></td>
                <td>1.1</td>
                <td>–47.9<sup>c</sup></td>
                <td>49</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Hypertension</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>8.8</td>
                <td>—</td>
                <td>2.4</td>
                <td>—</td>
                <td>1.0</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>46.8</td>
                <td>–5.4</td>
                <td>92.6</td>
                <td>+40.4<sup>c</sup></td>
                <td>97.3</td>
                <td>+45.1<sup>c</sup></td>
                <td>52.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>44.4</td>
                <td>–3.4</td>
                <td>5</td>
                <td>–42.8<sup>c</sup></td>
                <td>1.7</td>
                <td>–46.1<sup>c</sup></td>
                <td>47.8</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Amyotrophic lateral sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>18.5</td>
                <td>—</td>
                <td>4.3</td>
                <td>—</td>
                <td>1.6</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>33.6</td>
                <td>–20.2<sup>c</sup></td>
                <td>80.5</td>
                <td>+26.7<sup>c</sup></td>
                <td>94.8</td>
                <td>+41.0<sup>c</sup></td>
                <td>53.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>47.9</td>
                <td>+1.7</td>
                <td>15.2</td>
                <td>–31.0<sup>c</sup></td>
                <td>3.6</td>
                <td>–42.6<sup>c</sup></td>
                <td>46.2</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>COVID-19</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>11.5</td>
                <td>—</td>
                <td>3</td>
                <td>—</td>
                <td>0.3</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>27.7</td>
                <td>–27.1<sup>c</sup></td>
                <td>79.4</td>
                <td>+24.6<sup>c</sup></td>
                <td>99.2</td>
                <td>+44.4<sup>c</sup></td>
                <td>54.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>60.8</td>
                <td>+15.6<sup>c</sup></td>
                <td>17.6</td>
                <td>–27.6<sup>c</sup></td>
                <td>0.5</td>
                <td>–44.7<sup>c</sup></td>
                <td>45.2</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Multiple myeloma</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>13.9</td>
                <td>—</td>
                <td>2</td>
                <td>—</td>
                <td>0.7</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>32.9</td>
                <td>–22.6<sup>c</sup></td>
                <td>94.9</td>
                <td>+39.4<sup>c</sup></td>
                <td>99.3</td>
                <td>+43.8<sup>c</sup></td>
                <td>55.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>53.2</td>
                <td>+8.7</td>
                <td>3.1</td>
                <td>–41.4<sup>c</sup></td>
                <td>0</td>
                <td>–44.5<sup>c</sup></td>
                <td>44.5</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Colon cancer</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>8.2</td>
                <td>—</td>
                <td>2.1</td>
                <td>—</td>
                <td>1.4</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>52.0</td>
                <td>–5.1</td>
                <td>93.4</td>
                <td>+36.3<sup>c</sup></td>
                <td>98.6</td>
                <td>+41.5<sup>c</sup></td>
                <td>57.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>39.8</td>
                <td>–3.1</td>
                <td>4.5</td>
                <td>–38.4<sup>c</sup></td>
                <td>0.0</td>
                <td>–42.9<sup>c</sup></td>
                <td>42.9</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Tricuspid endocarditis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>10.4</td>
                <td>—</td>
                <td>0.8</td>
                <td>—</td>
                <td>0.2</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>36.5</td>
                <td>–21.5<sup>c</sup></td>
                <td>92.4</td>
                <td>+34.4<sup>c</sup></td>
                <td>94.4</td>
                <td>+36.4<sup>c</sup></td>
                <td>58</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>53.1</td>
                <td>+11.1<sup>c</sup></td>
                <td>6.8</td>
                <td>–35.2<sup>c</sup></td>
                <td>5.4</td>
                <td>–36.6<sup>c</sup></td>
                <td>42</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Hepatitis B</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>15.3</td>
                <td>—</td>
                <td>3.3</td>
                <td>—</td>
                <td>0.9</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>36.1</td>
                <td>–23.7<sup>c</sup></td>
                <td>96.3</td>
                <td>+36.5<sup>c</sup></td>
                <td>99</td>
                <td>+39.2<sup>c</sup></td>
                <td>59.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>48.6</td>
                <td>+8.4</td>
                <td>0.4</td>
                <td>–39.8<sup>c</sup></td>
                <td>0.1</td>
                <td>–40.1<sup>c</sup></td>
                <td>40.2</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Tuberculosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>19.7</td>
                <td>—</td>
                <td>2.7</td>
                <td>—</td>
                <td>0.8</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>33.5</td>
                <td>–27.7<sup>c</sup></td>
                <td>82.0</td>
                <td>+20.8<sup>c</sup></td>
                <td>98.1</td>
                <td>+36.9<sup>c</sup></td>
                <td>61.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>46.8</td>
                <td>+8.0</td>
                <td>15.3</td>
                <td>–23.5<sup>c</sup></td>
                <td>1.1</td>
                <td>–37.7<sup>c</sup></td>
                <td>38.8</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Syphilis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>19.3</td>
                <td>—</td>
                <td>2.7</td>
                <td>—</td>
                <td>0.8</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>48.1</td>
                <td>–31.6<sup>c</sup></td>
                <td>97.1</td>
                <td>+17.4<sup>c</sup></td>
                <td>99.2</td>
                <td>+19.5<sup>c</sup></td>
                <td>79.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>32.6</td>
                <td>+12.3<sup>c</sup></td>
                <td>0.2</td>
                <td>–20.1<sup>c</sup></td>
                <td>0.0</td>
                <td>–20.3<sup>c</sup></td>
                <td>20.3</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>HIV</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>17.8</td>
                <td>—</td>
                <td>4.4</td>
                <td>—</td>
                <td>1.4</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>47.6</td>
                <td>–33.5<sup>c</sup></td>
                <td>95.3</td>
                <td>+14.2<sup>c</sup></td>
                <td>98.5</td>
                <td>+17.4<sup>c</sup></td>
                <td>81.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>34.6</td>
                <td>+15.7<sup>c</sup></td>
                <td>0.3</td>
                <td>–18.6<sup>c</sup></td>
                <td>0.1</td>
                <td>–18.8<sup>c</sup></td>
                <td>18.9</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>SPD: statistical parity difference.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>Not available.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>When |SPD| &gt;10%, it indicates a significant deviation from real-world prevalence: values &gt;+10% denote overrepresentation, and values &lt;–10% denote underrepresentation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Gender bias distribution in Llama2-7B (7 billion parameters) and Llama2-13B (13 billion parameters).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="170"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Diseases and gender</td>
                <td colspan="2">Llama 2-7B</td>
                <td colspan="2">Llama 2-13B</td>
                <td>Actual ratio (%)</td>
              </tr>
              <tr valign="top">
                <td colspan="2"/>
                <td>Ratio (n=1000, %)</td>
                <td>SPD<sup>a</sup> (%)</td>
                <td>Ratio (n=1000, %)</td>
                <td>SPD (%)</td>
                <td/>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="7">
                  <bold>Lupus</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>5.8</td>
                <td>—<sup>b</sup></td>
                <td>6.3</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>0.8</td>
                <td>–9.9</td>
                <td>0</td>
                <td>–10.7<sup>c</sup></td>
                <td>10.7</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>93.4</td>
                <td>+4.1</td>
                <td>93.7</td>
                <td>+4.4</td>
                <td>89.3</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Takotsubo cardiomyopathy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>0.2</td>
                <td>—</td>
                <td>0.3</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>25.4</td>
                <td>+12.3<sup>c</sup></td>
                <td>2.3</td>
                <td>–10.8<sup>c</sup></td>
                <td>13.1</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>74.4</td>
                <td>–12.5<sup>c</sup></td>
                <td>97.4</td>
                <td>+10.5<sup>c</sup></td>
                <td>86.9</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Multiple sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>10.4</td>
                <td>—</td>
                <td>8.6</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>7.8</td>
                <td>–16.4<sup>c</sup></td>
                <td>0.3</td>
                <td>–23.9<sup>c</sup></td>
                <td>24.2</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>81.8</td>
                <td>+6.0</td>
                <td>91.1</td>
                <td>+15.3<sup>c</sup></td>
                <td>75.8</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Rheumatoid arthritis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>3.8</td>
                <td>—</td>
                <td>0.7</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>11.9</td>
                <td>–14.0<sup>c</sup></td>
                <td>0.1</td>
                <td>–25.8<sup>c</sup></td>
                <td>25.9</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>84.3</td>
                <td>+9.9</td>
                <td>99.2</td>
                <td>+25.1<sup>c</sup></td>
                <td>74.1</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Major depressive disorder</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>10.2</td>
                <td>—</td>
                <td>7.1</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>45.1</td>
                <td>+8.4</td>
                <td>8.1</td>
                <td>–28.6<sup>c</sup></td>
                <td>36.7</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>44.7</td>
                <td>–18.6<sup>c</sup></td>
                <td>84.8</td>
                <td>+21.5<sup>c</sup></td>
                <td>63.3</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Huntington disease</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>0.3</td>
                <td>—</td>
                <td>1.2</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>78.9</td>
                <td>+28.9<sup>c</sup></td>
                <td>91.1</td>
                <td>+41.1<sup>c</sup></td>
                <td>50.0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>20.8</td>
                <td>–29.2<sup>c</sup></td>
                <td>7.7</td>
                <td>–42.3<sup>c</sup></td>
                <td>50.0</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Bacterial pneumonia</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>0</td>
                <td>—</td>
                <td>0.3</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>86.0</td>
                <td>+35.0<sup>c</sup></td>
                <td>94.2</td>
                <td>+43.2<sup>c</sup></td>
                <td>51.0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>14.0</td>
                <td>–35.0<sup>c</sup></td>
                <td>5.5</td>
                <td>–43.5<sup>c</sup></td>
                <td>49.0</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Hypertension</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>0.3</td>
                <td>—</td>
                <td>0.1</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>84.8</td>
                <td>+32.6<sup>c</sup></td>
                <td>95.7</td>
                <td>+43.5<sup>c</sup></td>
                <td>52.2</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>14.9</td>
                <td>–32.9<sup>c</sup></td>
                <td>4.2</td>
                <td>–42.8<sup>c</sup></td>
                <td>47.8</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Amyotrophic lateral sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>9.3</td>
                <td>—</td>
                <td>8.4</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>86.2</td>
                <td>+32.4<sup>c</sup></td>
                <td>87.8</td>
                <td>+34.0<sup>c</sup></td>
                <td>53.8</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>4.5</td>
                <td>–41.7<sup>c</sup></td>
                <td>3.8</td>
                <td>–42.4<sup>c</sup></td>
                <td>46.2</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>COVID-19</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>0.3</td>
                <td>—</td>
                <td>7.8</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>91.0</td>
                <td>+36.2<sup>c</sup></td>
                <td>92.1</td>
                <td>+37.3<sup>c</sup></td>
                <td>54.8</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>8.7</td>
                <td>–36.5<sup>c</sup></td>
                <td>0.1</td>
                <td>–45.1<sup>c</sup></td>
                <td>45.2</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Multiple myeloma</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>6.7</td>
                <td>—</td>
                <td>2.2</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>88.4</td>
                <td>+32.9<sup>c</sup></td>
                <td>97.4</td>
                <td>+41.9<sup>c</sup></td>
                <td>55.5</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>4.9</td>
                <td>–39.6<sup>c</sup></td>
                <td>0.4</td>
                <td>–44.1<sup>c</sup></td>
                <td>44.5</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Colon cancer</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>7</td>
                <td>—</td>
                <td>7.5</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>85.2</td>
                <td>+28.1<sup>c</sup></td>
                <td>91.8</td>
                <td>+34.7<sup>c</sup></td>
                <td>57.1</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>7.8</td>
                <td>–35.1<sup>c</sup></td>
                <td>0.7</td>
                <td>–42.2<sup>c</sup></td>
                <td>42.9</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Tricuspid endocarditis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>0.2</td>
                <td>—</td>
                <td>0.4</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>98.4</td>
                <td>+40.4<sup>c</sup></td>
                <td>89.1</td>
                <td>+31.1<sup>c</sup></td>
                <td>58</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>1.4</td>
                <td>–40.6<sup>c</sup></td>
                <td>10.5</td>
                <td>-31.5<sup>c</sup></td>
                <td>42</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Hepatitis B</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>5.9</td>
                <td>—</td>
                <td>4.9</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>86.1</td>
                <td>+26.3<sup>c</sup></td>
                <td>92.9</td>
                <td>+33.1<sup>c</sup></td>
                <td>59.8</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>8</td>
                <td>–32.2<sup>c</sup></td>
                <td>2.2</td>
                <td>–38.0<sup>c</sup></td>
                <td>40.2</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Tuberculosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>6.9</td>
                <td>—</td>
                <td>6.4</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>83.8</td>
                <td>+22.6<sup>c</sup></td>
                <td>93.1</td>
                <td>+31.9<sup>c</sup></td>
                <td>61.2</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>9.3</td>
                <td>–29.5<sup>c</sup></td>
                <td>0.5</td>
                <td>–38.3<sup>c</sup></td>
                <td>38.8</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Syphilis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>20.2</td>
                <td>—</td>
                <td>16.5</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>79.1</td>
                <td>–0.6</td>
                <td>83.5</td>
                <td>+3.8</td>
                <td>79.7</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>0.7</td>
                <td>–19.6<sup>c</sup></td>
                <td>0</td>
                <td>–20.3<sup>c</sup></td>
                <td>20.3</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>HIV</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Not available</td>
                <td>24.1</td>
                <td>—</td>
                <td>20.8</td>
                <td>—</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Male</td>
                <td>67.8</td>
                <td>–13.3<sup>c</sup></td>
                <td>78.9</td>
                <td>–2.2</td>
                <td>81.1</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Female</td>
                <td>8.1</td>
                <td>–10.8<sup>c</sup></td>
                <td>0.3</td>
                <td>–18.6<sup>c</sup></td>
                <td>18.9</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>SPD: statistical parity difference.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>Not available.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>When |SPD| &gt;10%, it indicates a significant deviation from real-world prevalence: values &gt;+10% denote overrepresentation, and values &lt;–10% denote underrepresentation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Gender bias distribution in Yi-6B (6 billion parameters) and Yi-34B (34 billion parameters).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="270"/>
            <col width="150"/>
            <col width="120"/>
            <col width="160"/>
            <col width="130"/>
            <col width="0"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Diseases and gender</td>
                <td colspan="2">Yi-6B</td>
                <td colspan="3">Yi-34B</td>
                <td>Actual ratio (%)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Ratio (n=1000, %)</td>
                <td>SPD<sup>a</sup> (%)</td>
                <td>Ratio (n=1000, %)</td>
                <td>SPD (%)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Lupus</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>13.5</td>
                <td>—<sup>b</sup></td>
                <td>0.7</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>5.0</td>
                <td>–5.7</td>
                <td>0.0</td>
                <td>–10.7<sup>c</sup></td>
                <td colspan="2">10.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>81.5</td>
                <td>–7.8</td>
                <td>99.3</td>
                <td>+10.0<sup>c</sup></td>
                <td colspan="2">89.3</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Takotsubo cardiomyopathy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>15.3</td>
                <td>—</td>
                <td>1.5</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>12.2</td>
                <td>–0.9</td>
                <td>0.0</td>
                <td>–13.1<sup>c</sup></td>
                <td colspan="2">13.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>72.5</td>
                <td>–14.4<sup>c</sup></td>
                <td>98.5</td>
                <td>+11.6<sup>c</sup></td>
                <td colspan="2">86.9</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Multiple sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>15.9</td>
                <td>—</td>
                <td>0.8</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>14.5</td>
                <td>–9.7</td>
                <td>21.5</td>
                <td>–2.7</td>
                <td colspan="2">24.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>69.6</td>
                <td>–6.2</td>
                <td>77.7</td>
                <td>+1.9</td>
                <td colspan="2">75.8</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Rheumatoid arthritis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>15.0</td>
                <td>—</td>
                <td>1.7</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>21.0</td>
                <td>–4.9</td>
                <td>1.3</td>
                <td>–24.6<sup>c</sup></td>
                <td colspan="2">25.9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>64.0</td>
                <td>–10.1<sup>c</sup></td>
                <td>97.0</td>
                <td>+22.9<sup>c</sup></td>
                <td colspan="2">74.1</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Major depressive disorder</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>13.4</td>
                <td>—</td>
                <td>2.7</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>30.0</td>
                <td>–6.7</td>
                <td>53.0</td>
                <td>+16.3<sup>c</sup></td>
                <td colspan="2">36.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>56.6</td>
                <td>–6.7</td>
                <td>44.3</td>
                <td>–19.0<sup>c</sup></td>
                <td colspan="2">63.3</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Huntington disease</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>14.6</td>
                <td>—</td>
                <td>2.3</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>68.9</td>
                <td>+18.9<sup>c</sup></td>
                <td>78.1</td>
                <td>+28.1<sup>c</sup></td>
                <td colspan="2">50.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>16.5</td>
                <td>–33.5<sup>c</sup></td>
                <td>19.6</td>
                <td>–30.4<sup>c</sup></td>
                <td colspan="2">50.0</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Bacterial pneumonia</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>14.3</td>
                <td>—</td>
                <td>1.1</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>74.8</td>
                <td>+23.8<sup>c</sup></td>
                <td>92.0</td>
                <td>+41.0<sup>c</sup></td>
                <td colspan="2">51.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>10.9</td>
                <td>–38.1<sup>c</sup></td>
                <td>6.9</td>
                <td>–42.1<sup>c</sup></td>
                <td colspan="2">49.0</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Hypertension</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>14.3</td>
                <td>—</td>
                <td>1.2</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>81.0</td>
                <td>+28.8<sup>c</sup></td>
                <td>93.8</td>
                <td>+41.6<sup>c</sup></td>
                <td colspan="2">52.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>4.7</td>
                <td>–43.1<sup>c</sup></td>
                <td>5.0</td>
                <td>–42.8<sup>c</sup></td>
                <td colspan="2">47.8</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Amyotrophic lateral sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>15.8</td>
                <td>—</td>
                <td>0.3</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>64.6</td>
                <td>+10.8<sup>c</sup></td>
                <td>85.7</td>
                <td>+31.9<sup>c</sup></td>
                <td colspan="2">53.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>19.6</td>
                <td>–26.6<sup>c</sup></td>
                <td>14.0</td>
                <td>–32.2<sup>c</sup></td>
                <td colspan="2">46.2</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>COVID-19</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>19.4</td>
                <td>—</td>
                <td>2.3</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>74.8</td>
                <td>+20.0<sup>c</sup></td>
                <td>96.2</td>
                <td>+41.4<sup>c</sup></td>
                <td colspan="2">54.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>5.8</td>
                <td>–39.4<sup>c</sup></td>
                <td>1.5</td>
                <td>–43.7<sup>c</sup></td>
                <td colspan="2">45.2</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Multiple myeloma</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>17.7</td>
                <td>—</td>
                <td>0.9</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>75.7</td>
                <td>+20.2<sup>c</sup></td>
                <td>92.9</td>
                <td>+37.4<sup>c</sup></td>
                <td colspan="2">55.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>6.6</td>
                <td>–37.9<sup>c</sup></td>
                <td>6.2</td>
                <td>–38.3<sup>c</sup></td>
                <td colspan="2">44.5</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Colon cancer</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>16.3</td>
                <td>—</td>
                <td>0.6</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>79.5</td>
                <td>+22.4<sup>c</sup></td>
                <td>96.5</td>
                <td>+39.4<sup>c</sup></td>
                <td colspan="2">57.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>4.2</td>
                <td>–38.7<sup>c</sup></td>
                <td>2.9</td>
                <td>–40.0<sup>c</sup></td>
                <td colspan="2">42.9</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Tricuspid endocarditis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>16.5</td>
                <td>—</td>
                <td>0.9</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>63.2</td>
                <td>+5.2</td>
                <td>95.6</td>
                <td>+37.6<sup>c</sup></td>
                <td colspan="2">58.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>20.3</td>
                <td>–21.7<sup>c</sup></td>
                <td>3.5</td>
                <td>–38.5<sup>c</sup></td>
                <td colspan="2">42.0</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Hepatitis B</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>12.7</td>
                <td>—</td>
                <td>1.2</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>79.9</td>
                <td>+20.1<sup>c</sup></td>
                <td>92.3</td>
                <td>+32.5<sup>c</sup></td>
                <td colspan="2">59.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>7.4</td>
                <td>–32.8<sup>c</sup></td>
                <td>6.5</td>
                <td>–33.7<sup>c</sup></td>
                <td colspan="2">40.2</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Tuberculosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>11.8</td>
                <td>—</td>
                <td>0.3</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>72.1</td>
                <td>+10.9<sup>c</sup></td>
                <td>89.4</td>
                <td>+28.2<sup>c</sup></td>
                <td colspan="2">61.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>16.1</td>
                <td>–22.7<sup>c</sup></td>
                <td>10.3</td>
                <td>–28.5<sup>c</sup></td>
                <td colspan="2">38.8</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Syphilis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>12.5</td>
                <td>—</td>
                <td>1.1</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>82.9</td>
                <td>+3.2</td>
                <td>97.4</td>
                <td>+17.7<sup>c</sup></td>
                <td colspan="2">79.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>4.6</td>
                <td>–15.7<sup>c</sup></td>
                <td>1.5</td>
                <td>–18.8<sup>c</sup></td>
                <td colspan="2">20.3</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>HIV</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available</td>
                <td>12.3</td>
                <td>—</td>
                <td>1.5</td>
                <td>—</td>
                <td colspan="2">0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>83.5</td>
                <td>+2.4</td>
                <td>94.4</td>
                <td>+13.3<sup>c</sup></td>
                <td colspan="2">81.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>4.2</td>
                <td>–14.7<sup>c</sup></td>
                <td>4.1</td>
                <td>–14.8<sup>c</sup></td>
                <td colspan="2">18.9</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>SPD: statistical parity difference.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>Not available.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>When |SPD| &gt;10%, it indicates a significant deviation from real-world prevalence: values &gt;+10% denote overrepresentation, and values &lt;–10% denote underrepresentation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The gender polarization effect, where one gender was increasingly overrepresented in larger models, was evident in all 3 model families: Qwen, Llama2, and Yi. This effect was particularly notable in diseases where the gender distribution in the real world was either balanced or naturally tilted.</p>
        <p>For instance, in lupus, where the real-world distribution was 89.24% (29578/33145) female and 10.76% (3567/33145) male, the small model (Qwen-1.8B) generated 772 (77.2%) out of 1000 EHR cases as female (SPD=–12.1%) and 75 (7.5%) out of 1000 EHR cases as male (SPD=–3.2%) cases, underrepresenting males and undercorrecting for the dominant female population. As the model size increased, this bias toward female group was amplified. The Qwen-14B model generated 979 (97.9%) out of 1000 EHR cases as female, further overrepresenting female group (SPD=+8.6%) and exacerbating the already imbalanced gender distribution. Llama2-7B and Yi-7B showed similar trends, with Llama2-7B generating 934 (93.4%) out of 1000 EHR cases as female (SPD=+4.1%) and Yi-7B generating 815 (81.5%) out of 1000 EHR cases as female (SPD=–7.8%) in the smaller models. However, the larger models like Llama2-13B and Yi-34B exhibited further female overrepresentation, with Llama2-13B generating 937 (93.7%) out of 1000 EHR cases as female (SPD=+4.4%) and Yi-34B generating 993 (99.3%) out of 1000 EHR cases as female (SPD=+10%). This was an example of the gender polarization effect, where larger models disproportionately favored one gender, in this case, female group.</p>
        <p>Similarly, hypertension, which had a real-world gender distribution of 52.17% (79,540,040/152,466,669) male and 47.83% (72,926,629/152,466,669) female, showed a trend of male overrepresentation. In the Qwen-1.8B model, 468 (46.8%) out of 1000 EHR cases were male (SPD=–5.4%) and 444 (44.4%) out of 1000 EHR cases were female (SPD=–3.4%). As model size increased, male overrepresentation became more pronounced. The Qwen-14B model generated 973 (97.3%) out of 1000 EHR cases as male (SPD=+45.1%) and 17 (1.7%) out of 1000 EHR cases as female (SPD=–46.1%). The same trend was evident in Llama2 and Yi models, where larger versions (like Llama2-13B and Yi-34B) also displayed significant male overrepresentation. Llama2-13B generated 957 (95.7%) out of 1000 EHR cases as male (SPD=+43.5%), and Yi-34B generated 938 (93.8%) out of 1000 EHR cases as male (SPD=+41.6%).</p>
        <p>The gender polarization effect was clearly present, demonstrating that as model size increases, one gender is often disproportionately favored, skewing the synthetic data away from the real-world gender balance.</p>
        <p>Although gender bias was observed across all models, its direction and severity were primarily shaped by the real-world gender distribution of the diseases. As shown in <xref ref-type="table" rid="table7">Table 7</xref>, diseases with a strong female bias, such as lupus (29,578/33,145, 89.24% female vs 3567/33,145, 10.76% male) and Takotsubo cardiomyopathy (83,807/97,650, 86.9% female vs 13,843/97,650, 13.1% male), indicated a pronounced bias toward female group, reflecting a bias-polarized gender. Conversely, for diseases where male group were predominantly affected, such as syphilis (52,865/66,289, 79.75% male vs 13,424/66,289, 20.25% female) and HIV (29,470/36,136, 81.03% male vs 6666/36,136, 18.97% female), indicated that the models favored the male group, again aligning with the bias-polarized gender effect.</p>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Analysis of the relationship between specific diseases and bias-polarized gender with true prevalence in the United States.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Disease</td>
                <td>Bias-polarized gender</td>
                <td>Actual female ratio (%)</td>
                <td>Actual male ratio (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Lupus</td>
                <td>Female</td>
                <td>89.3</td>
                <td>10.7</td>
              </tr>
              <tr valign="top">
                <td>Takotsubo cardiomyopathy</td>
                <td>Female</td>
                <td>86.9</td>
                <td>13.1</td>
              </tr>
              <tr valign="top">
                <td>Multiple sclerosis</td>
                <td>Female</td>
                <td>75.8</td>
                <td>24.2</td>
              </tr>
              <tr valign="top">
                <td>Rheumatoid arthritis</td>
                <td>Female</td>
                <td>74.1</td>
                <td>25.9</td>
              </tr>
              <tr valign="top">
                <td>Major depressive disorder</td>
                <td>Female</td>
                <td>63.3</td>
                <td>36.7</td>
              </tr>
              <tr valign="top">
                <td>Huntington disease</td>
                <td>Male</td>
                <td>50</td>
                <td>50</td>
              </tr>
              <tr valign="top">
                <td>Bacterial pneumonia</td>
                <td>Male</td>
                <td>49</td>
                <td>51</td>
              </tr>
              <tr valign="top">
                <td>Hypertension</td>
                <td>Male</td>
                <td>47.8</td>
                <td>52.2</td>
              </tr>
              <tr valign="top">
                <td>Amyotrophic lateral sclerosis</td>
                <td>Male</td>
                <td>46.2</td>
                <td>53.8</td>
              </tr>
              <tr valign="top">
                <td>COVID-19</td>
                <td>Male</td>
                <td>45.2</td>
                <td>54.8</td>
              </tr>
              <tr valign="top">
                <td>Multiple myeloma</td>
                <td>Male</td>
                <td>44.5</td>
                <td>55.5</td>
              </tr>
              <tr valign="top">
                <td>Colon cancer</td>
                <td>Male</td>
                <td>42.9</td>
                <td>57.1</td>
              </tr>
              <tr valign="top">
                <td>Tricuspid endocarditis</td>
                <td>Male</td>
                <td>42</td>
                <td>58</td>
              </tr>
              <tr valign="top">
                <td>Hepatitis B</td>
                <td>Male</td>
                <td>40.2</td>
                <td>59.8</td>
              </tr>
              <tr valign="top">
                <td>Tuberculosis</td>
                <td>Male</td>
                <td>38.8</td>
                <td>61.2</td>
              </tr>
              <tr valign="top">
                <td>Syphilis</td>
                <td>Male</td>
                <td>20.3</td>
                <td>79.7</td>
              </tr>
              <tr valign="top">
                <td>HIV</td>
                <td>Male</td>
                <td>18.9</td>
                <td>81.1</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>Interestingly, even in diseases with a balanced gender distribution, such as Huntington disease (1853/3707, 50% male vs 1854/3707, 50% female) and bacterial pneumonia (2826/5553, 50.89% male vs 2727/5553, 49.11% female), indicated that models still tended to favor male group. This indicated alignment with the gender polarization effect, where, despite a balanced real-world gender distribution, models disproportionately overrepresent one gender, typically male group, as model size increased. This suggested that larger models may reinforce a bias-polarized gender preference, favoring the male group even when the actual prevalence of the disease is equally distributed between genders, and that this pattern might stem from societal gender norms influencing the inherent biases in LLMs [<xref ref-type="bibr" rid="ref42">42</xref>], potentially leading to an overrepresentation of male data.</p>
        <p>This observation suggested that the gender biases in the models were closely aligned with the real-world gender distributions of the diseases and further supported the notion that model size could exacerbate gender biases, leading to increasingly polarized representations of one gender or the other.</p>
      </sec>
      <sec>
        <title>Racial Bias</title>
        <p>Racial bias was also prevalent across all models, although its manifestation differed from gender bias. We used the chi-square test to assess whether the distribution of the generated clinical EHRs aligned with the objective distribution, using a significance level of 0.05. The detailed results, including <italic>P</italic> values, chi-square values, and <italic>df</italic>, are presented in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> (chi-square analysis results). Tables S1-S7 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> revealed that all 7 models exhibited varying degrees of racial bias across 20 diseases. We use SPD to measure the specific direction and extent of racial bias. The complete racial bias distribution statistics can be found in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> (racial bias distribution), with the key results summarized in <xref ref-type="table" rid="table8">Tables 8</xref>-<xref ref-type="table" rid="table10">10</xref>. However, compared to gender biases, the racial biases demonstrated more complex directional and magnitude differences.</p>
        <p>Unlike gender bias, where larger models often exhibited clear polarization toward one gender, racial bias did not show the same simple polarization phenomenon across all diseases as shown in <xref rid="figure2" ref-type="fig">Figures 2</xref>-<xref rid="figure4" ref-type="fig">4</xref>. Instead, some diseases showed the polarization of cases toward a single racial group, while others presented varying trends across models. For example, as shown in <xref ref-type="table" rid="table8">Table 8</xref>, in diseases like HIV, hypertension, and preeclampsia, the vast majority of models showed SPD values &gt;10% for the Black population. In diseases like bacterial pneumonia, colon cancer, rheumatoid arthritis, amyotrophic lateral sclerosis, and Huntington disease, White individuals were disproportionately represented, as shown in <xref ref-type="table" rid="table9">Table 9</xref>.</p>
        <p>One notable difference from gender bias was that fewer diseases were significantly influenced by the original racial distribution of the disease. Only a few diseases had a potential association with the original data distribution. For these diseases, polarization toward a single racial group occurred in all models except for Qwen-1.8B. These diseases included HIV (favoring Black individuals) and bacterial pneumonia, colon cancer, multiple myeloma, rheumatoid arthritis, amyotrophic lateral sclerosis, and Huntington disease (favoring White individuals).</p>
        <p>In addition, as shown in <xref ref-type="table" rid="table10">Table 10</xref>, the representation of minority racial groups, particularly Hispanic and Asian populations, was significantly underrepresented across most diseases. The Hispanic group showed a significant underrepresentation across all models, with an average SPD of –11.93% (SD 8.36%). This indicated that all 7 models severely underestimated the Hispanic population in the generation of synthetic EHRs, highlighting a clear racial bias. Similarly, the Asian group also demonstrated underrepresentation, although to a lesser extent, with an average SPD of –0.77% (SD 11.99%). It suggested a tendency for models to generate fewer cases for Asian populations compared to real-world data.</p>
        <p>In contrast, the representation of the Black and White groups was more complex and did not demonstrate a consistent racial bias across the models. For example, the Qwen-1.8B model showed a significant underrepresentation of White individuals (mean SPD –20.40%, SD 18.11%), while the Yi-34B model showed a substantial overrepresentation of Black individuals (mean SPD –14.90%, SD 27.16%). These discrepancies suggested that while there was no clear systematic bias against Black or White groups, different models behaved inconsistently, with some models favoring one group over another.</p>
        <p>Considering that models within the same series likely used similar training data, it could also be observed that the model size played an important role in shaping racial biases. For instance, the smaller Yi–6B (mean SPD –17.00%, SD 7.86%) model underrepresented Black individuals, while the larger Yi–34B (mean SPD 14.90%, SD 27.16%) model significantly overrepresented them, highlighting how model size can affect the direction of bias for this group.</p>
        <p>In conclusion, while racial bias did not manifest in the same polarized manner as gender bias, the extent and impact of racial bias in health care AI models were still concerning. Larger models might exhibit racial polarization for specific diseases, leading to overrepresentation of one racial group while ignoring others, particularly minority groups. This raised critical concerns about the fairness and accuracy of generated clinical data, especially for populations that are often marginalized in the medical field.</p>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Proportion and statistical parity difference (SPD) values for the Black group across 7 models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="120"/>
            <col width="130"/>
            <col width="120"/>
            <col width="130"/>
            <col width="130"/>
            <col width="150"/>
            <col width="90"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Diseases</td>
                <td>Qwen-1.8B</td>
                <td>Qwen-7B</td>
                <td>Qwen-14B</td>
                <td>Llama2-7B</td>
                <td>Llama2-13B</td>
                <td>Yi-6B</td>
                <td>Yi-34B</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="9">
                  <bold>Amyotrophic lateral sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>9.2</td>
                <td>2.5</td>
                <td>6.5</td>
                <td>1.2</td>
                <td>0.1</td>
                <td>0</td>
                <td>0.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+2.7</td>
                <td>–4.0</td>
                <td>0</td>
                <td>–5.3</td>
                <td>–6.4</td>
                <td>–6.5</td>
                <td>–5.9</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Bacterial pneumonia</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>15.3</td>
                <td>4.4</td>
                <td>8.3</td>
                <td>6.2</td>
                <td>2.0</td>
                <td>0.4</td>
                <td>19.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–9.7</td>
                <td>–20.6<sup>a</sup></td>
                <td>–16.7<sup>a</sup></td>
                <td>–18.8<sup>a</sup></td>
                <td>–23.0<sup>a</sup></td>
                <td>–24.6<sup>a</sup></td>
                <td>–5.7</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Colon cancer</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>25.6</td>
                <td>6.7</td>
                <td>5.5</td>
                <td>2.6</td>
                <td>1.5</td>
                <td>0.4</td>
                <td>18.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+10.0<sup>a</sup></td>
                <td>–8.9</td>
                <td>–10.1<sup>a</sup></td>
                <td>–13.0<sup>a</sup></td>
                <td>–14.1<sup>a</sup></td>
                <td>–15.2<sup>a</sup></td>
                <td>+2.8</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>COVID-19</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>14.8</td>
                <td>7.0</td>
                <td>4.2</td>
                <td>6.9</td>
                <td>3.6</td>
                <td>1.0</td>
                <td>13.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+1.1</td>
                <td>–6.7</td>
                <td>–9.5</td>
                <td>–6.8</td>
                <td>–10.1<sup>a</sup></td>
                <td>–12.7<sup>a</sup></td>
                <td>–0.5</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Hepatitis B</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>16.9</td>
                <td>13.7</td>
                <td>0</td>
                <td>12.2</td>
                <td>15.5</td>
                <td>1.1</td>
                <td>12.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–13.5<sup>a</sup></td>
                <td>–16.7<sup>a</sup></td>
                <td>–30.4<sup>a</sup></td>
                <td>–18.2<sup>a</sup></td>
                <td>–14.9<sup>a</sup></td>
                <td>–29.3<sup>a</sup></td>
                <td>–18.2<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>HIV</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>51.8</td>
                <td>59.8</td>
                <td>88.9</td>
                <td>68.5</td>
                <td>67.6</td>
                <td>14.8</td>
                <td>95.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+10.1<sup>a</sup></td>
                <td>+18.1<sup>a</sup></td>
                <td>+47.2<sup>a</sup></td>
                <td>+26.8<sup>a</sup></td>
                <td>+25.9<sup>a</sup></td>
                <td>–26.9<sup>a</sup></td>
                <td>+53.9<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Huntington disease</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>16.8</td>
                <td>1.2</td>
                <td>1.2</td>
                <td>4.8</td>
                <td>0.6</td>
                <td>0.2</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+2.8</td>
                <td>–12.8<sup>a</sup></td>
                <td>–12.8<sup>a</sup></td>
                <td>–9.2</td>
                <td>–13.4<sup>a</sup></td>
                <td>–13.8<sup>a</sup></td>
                <td>–14.0<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Hypertension</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>27.8</td>
                <td>24.4</td>
                <td>49.7</td>
                <td>53.7</td>
                <td>71.8</td>
                <td>1.1</td>
                <td>77.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+13.1<sup>a</sup></td>
                <td>+9.7</td>
                <td>+35.0<sup>a</sup></td>
                <td>+39.0<sup>a</sup></td>
                <td>+57.1<sup>a</sup></td>
                <td>–13.6<sup>a</sup></td>
                <td>+62.5<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Lupus</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>30.6</td>
                <td>17.0</td>
                <td>28.8</td>
                <td>75.4</td>
                <td>72.6</td>
                <td>0.9</td>
                <td>57.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+2.2</td>
                <td>–11.4<sup>a</sup></td>
                <td>+0.4</td>
                <td>+47.0<sup>a</sup></td>
                <td>+44.2<sup>a</sup></td>
                <td>–27.5<sup>a</sup></td>
                <td>+28.7<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Major depressive disorder</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>6.8</td>
                <td>10.6</td>
                <td>6.5</td>
                <td>7.3</td>
                <td>0.1</td>
                <td>0.3</td>
                <td>4.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–3.7</td>
                <td>+0.1</td>
                <td>–4.0</td>
                <td>–3.2</td>
                <td>–10.4<sup>a</sup></td>
                <td>–10.2<sup>a</sup></td>
                <td>–6.1</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Multiple myeloma</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>17.8</td>
                <td>6.5</td>
                <td>13.8</td>
                <td>19.7</td>
                <td>2.3</td>
                <td>0.7</td>
                <td>6.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–3.4</td>
                <td>–14.7<sup>a</sup></td>
                <td>–7.4</td>
                <td>–1.5</td>
                <td>–18.9<sup>a</sup></td>
                <td>–20.5<sup>a</sup></td>
                <td>–14.8<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Multiple sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>19.3</td>
                <td>6.6</td>
                <td>10.2</td>
                <td>7.5</td>
                <td>0.2</td>
                <td>0.3</td>
                <td>0.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+8.5</td>
                <td>–4.2</td>
                <td>–0.6</td>
                <td>–3.3</td>
                <td>–10.6<sup>a</sup></td>
                <td>–10.5<sup>a</sup></td>
                <td>–10.3<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Preeclampsia</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>26.7</td>
                <td>43.2</td>
                <td>53.3</td>
                <td>83.7</td>
                <td>54.6</td>
                <td>7.2</td>
                <td>69.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+8.7</td>
                <td>+25.2<sup>a</sup></td>
                <td>+35.3<sup>a</sup></td>
                <td>+65.7<sup>a</sup></td>
                <td>+36.6<sup>a</sup></td>
                <td>–10.8<sup>a</sup></td>
                <td>+51.1<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Prostate cancer</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>20.4</td>
                <td>18.4</td>
                <td>17.5</td>
                <td>15.6</td>
                <td>26.7</td>
                <td>0.4</td>
                <td>29.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+5.8</td>
                <td>+3.8</td>
                <td>+2.9</td>
                <td>+1.0</td>
                <td>+12.1<sup>a</sup></td>
                <td>–14.2<sup>a</sup></td>
                <td>+15.2<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Rheumatoid arthritis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>12.1</td>
                <td>5.1</td>
                <td>6.7</td>
                <td>5.1</td>
                <td>0.2</td>
                <td>0.2</td>
                <td>14.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+0.8</td>
                <td>–6.2</td>
                <td>–4.6</td>
                <td>–6.2</td>
                <td>–11.1<sup>a</sup></td>
                <td>–11.1<sup>a</sup></td>
                <td>+3.2</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Sarcoidosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>13.0</td>
                <td>34.7</td>
                <td>92.6</td>
                <td>88.2</td>
                <td>53.9</td>
                <td>11.9</td>
                <td>88.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–20.2<sup>a</sup></td>
                <td>+1.5</td>
                <td>+59.4<sup>a</sup></td>
                <td>+55.0<sup>a</sup></td>
                <td>+20.7<sup>a</sup></td>
                <td>–21.3<sup>a</sup></td>
                <td>+55.0<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Syphilis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>35.1</td>
                <td>20.8</td>
                <td>6.1</td>
                <td>28</td>
                <td>38.0</td>
                <td>2.2</td>
                <td>77.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–2.9</td>
                <td>–17.2</td>
                <td>–31.9</td>
                <td>–10</td>
                <td>0</td>
                <td>–35.8</td>
                <td>+39.6</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Takotsubo cardiomyopathy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>2.3</td>
                <td>1.3</td>
                <td>0</td>
                <td>4.8</td>
                <td>0.7</td>
                <td>0.3</td>
                <td>1.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–5.0</td>
                <td>–6.0</td>
                <td>–7.3</td>
                <td>–2.5</td>
                <td>–6.6</td>
                <td>–7.0</td>
                <td>–5.5</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Tricuspid endocarditis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>14.1</td>
                <td>9.1</td>
                <td>10</td>
                <td>4.2</td>
                <td>2.6</td>
                <td>1.3</td>
                <td>28.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–1.6</td>
                <td>–6.6</td>
                <td>–5.7</td>
                <td>–11.5<sup>a</sup></td>
                <td>–13.1<sup>a</sup></td>
                <td>–14.4<sup>a</sup></td>
                <td>+13.0<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Tuberculosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>25.1</td>
                <td>23.2</td>
                <td>35</td>
                <td>33.9</td>
                <td>35.3</td>
                <td>4.3</td>
                <td>73.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+5.7</td>
                <td>+3.8</td>
                <td>+15.6<sup>a</sup></td>
                <td>+14.5<sup>a</sup></td>
                <td>+15.9<sup>a</sup></td>
                <td>–15.1<sup>a</sup></td>
                <td>+54.4<sup>a</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table8fn1">
              <p><sup>a</sup>When |SPD| &gt;10%, it indicates a significant deviation from real-world prevalence: values &gt;+10% denote overrepresentation, and values &lt;–10% denote underrepresentation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Proportion and statistical parity difference (SPD) values for the White group across 7 models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="270"/>
            <col width="100"/>
            <col width="0"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Diseases</td>
                <td colspan="2">Qwen-1.8B</td>
                <td>Qwen-7B</td>
                <td>Qwen-14B</td>
                <td>Llama2-7B</td>
                <td>Llama2-13B</td>
                <td>Yi-6B</td>
                <td>Yi-34B</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="10">
                  <bold>Amyotrophic lateral sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>46.8</td>
                <td colspan="2">87.5</td>
                <td>87.4</td>
                <td>95</td>
                <td>89.4</td>
                <td>77.6</td>
                <td>92.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–19.9<sup>a</sup></td>
                <td colspan="2">+20.8<sup>a</sup></td>
                <td>+20.7<sup>a</sup></td>
                <td>+28.3<sup>a</sup></td>
                <td>+22.7<sup>a</sup></td>
                <td>+10.9<sup>a</sup></td>
                <td>+25.6<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Bacterial pneumonia</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>42.5</td>
                <td colspan="2">84.4</td>
                <td>70.9</td>
                <td>87.7</td>
                <td>97.4</td>
                <td>76.4</td>
                <td>77</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–18.5<sup>a</sup></td>
                <td colspan="2">+23.4<sup>a</sup></td>
                <td>+9.9</td>
                <td>+26.7<sup>a</sup></td>
                <td>+36.4<sup>a</sup></td>
                <td>+15.4<sup>a</sup></td>
                <td>+16.0<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Colon cancer</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>39.5</td>
                <td colspan="2">83.9</td>
                <td>83.8</td>
                <td>88.7</td>
                <td>90.9</td>
                <td>78.1</td>
                <td>78.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–23.1<sup>a</sup></td>
                <td colspan="2">+21.3<sup>a</sup></td>
                <td>+21.2<sup>a</sup></td>
                <td>+26.1<sup>a</sup></td>
                <td>+28.3<sup>a</sup></td>
                <td>+15.5<sup>a</sup></td>
                <td>+16.1<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>COVID-19</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>40.3</td>
                <td colspan="2">74.4</td>
                <td>61.3</td>
                <td>21.1</td>
                <td>81.6</td>
                <td>62.4</td>
                <td>57.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–26.7<sup>a</sup></td>
                <td colspan="2">+7.4</td>
                <td>–5.7</td>
                <td>–45.9<sup>a</sup></td>
                <td>+14.6<sup>a</sup></td>
                <td>–4.6</td>
                <td>–10.0<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Hepatitis B</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>30.8</td>
                <td colspan="2">38.9</td>
                <td>11.2</td>
                <td>1.9</td>
                <td>5.1</td>
                <td>45.1</td>
                <td>18.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>+2.6</td>
                <td colspan="2">+10.7<sup>a</sup></td>
                <td>–17.<sup>a</sup>0</td>
                <td>–26.3<sup>a</sup></td>
                <td>–23.1<sup>a</sup></td>
                <td>+16.9<sup>a</sup></td>
                <td>–9.7</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>HIV</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>20.8</td>
                <td colspan="2">33.4</td>
                <td>6.3</td>
                <td>4.6</td>
                <td>8.4</td>
                <td>64.4</td>
                <td>2.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–3.7</td>
                <td colspan="2">+8.9</td>
                <td>–18.2<sup>a</sup></td>
                <td>–19.9<sup>a</sup></td>
                <td>–16.1<sup>a</sup></td>
                <td>+39.9<sup>a</sup></td>
                <td>–21.9<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Huntington disease</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>31.8</td>
                <td colspan="2">93.9</td>
                <td>95.1</td>
                <td>88.5</td>
                <td>86.5</td>
                <td>82.6</td>
                <td>96.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–38.3<sup>a</sup></td>
                <td colspan="2">+23.8<sup>a</sup></td>
                <td>+25.0<sup>a</sup></td>
                <td>+18.4<sup>a</sup></td>
                <td>+16.4<sup>a</sup></td>
                <td>+12.5<sup>a</sup></td>
                <td>+26.2<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Hypertension</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>35.1</td>
                <td colspan="2">70.1</td>
                <td>42.5</td>
                <td>41.8</td>
                <td>27.2</td>
                <td>78.6</td>
                <td>21.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–18.5<sup>a</sup></td>
                <td colspan="2">+16.5<sup>a</sup></td>
                <td>–11.1<sup>a</sup></td>
                <td>–11.8<sup>a</sup></td>
                <td>–26.4<sup>a</sup></td>
                <td>+25.0<sup>a</sup></td>
                <td>–32.1<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Lupus</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>32.5</td>
                <td colspan="2">70.5</td>
                <td>53.4</td>
                <td>8.3</td>
                <td>16.2</td>
                <td>73.7</td>
                <td>31.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–15.6<sup>a</sup></td>
                <td colspan="2">+22.4<sup>a</sup></td>
                <td>+5.3</td>
                <td>–39.8<sup>a</sup></td>
                <td>–31.9<sup>a</sup></td>
                <td>+25.6<sup>a</sup></td>
                <td>–17.0<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Major depressive disorder</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>37.9</td>
                <td colspan="2">78.8</td>
                <td>88.7</td>
                <td>50.0</td>
                <td>88.3</td>
                <td>82.9</td>
                <td>90.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–28.8<sup>a</sup></td>
                <td colspan="2">+12.1<sup>a</sup></td>
                <td>+22.0<sup>a</sup></td>
                <td>–16.7<sup>a</sup></td>
                <td>+21.6<sup>a</sup></td>
                <td>+16.2<sup>a</sup></td>
                <td>+23.5<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Multiple myeloma</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>44.3</td>
                <td colspan="2">85.9</td>
                <td>69.5</td>
                <td>71.7</td>
                <td>94.9</td>
                <td>77.2</td>
                <td>89.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–21.2<sup>a</sup></td>
                <td colspan="2">+20.4<sup>a</sup></td>
                <td>+4.0</td>
                <td>+6.2</td>
                <td>+29.4<sup>a</sup></td>
                <td>+11.7<sup>a</sup></td>
                <td>+23.9<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Multiple sclerosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>49.9</td>
                <td colspan="2">83.3</td>
                <td>84.1</td>
                <td>80.0</td>
                <td>90.3</td>
                <td>77.1</td>
                <td>94.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–27.7<sup>a</sup></td>
                <td colspan="2">+5.7</td>
                <td>+6.5</td>
                <td>+2.4</td>
                <td>+12.7<sup>a</sup></td>
                <td>–0.5</td>
                <td>+17.2<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Preeclampsia</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>30.8</td>
                <td colspan="2">49.8</td>
                <td>31.7</td>
                <td>8.0</td>
                <td>34.5</td>
                <td>64.9</td>
                <td>26.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–22.4<sup>a</sup></td>
                <td colspan="2">–3.4</td>
                <td>–21.5<sup>a</sup></td>
                <td>–45.2<sup>a</sup></td>
                <td>–18.7<sup>a</sup></td>
                <td>+11.7<sup>a</sup></td>
                <td>–26.6<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Prostate cancer</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>42.4</td>
                <td colspan="2">76.0</td>
                <td>76.5</td>
                <td>77.9</td>
                <td>66.5</td>
                <td>79.1</td>
                <td>67.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–32.0<sup>a</sup></td>
                <td colspan="2">+1.6</td>
                <td>+2.1</td>
                <td>+3.5</td>
                <td>–7.9</td>
                <td>+4.7</td>
                <td>–7.4</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Rheumatoid arthritis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td>43.8</td>
                <td colspan="2">81.4</td>
                <td>81.6</td>
                <td>86.0</td>
                <td>98.8</td>
                <td>76.0</td>
                <td>76.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td>–9.9</td>
                <td colspan="2">+27.7<sup>a</sup></td>
                <td>+27.9<sup>a</sup></td>
                <td>+32.3<sup>a</sup></td>
                <td>+45.1<sup>a</sup></td>
                <td>+22.3<sup>a</sup></td>
                <td>+22.6<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Sarcoidosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td colspan="2">50.5</td>
                <td>61.7</td>
                <td>6.1</td>
                <td>5.6</td>
                <td>44.6</td>
                <td>68.4</td>
                <td>10.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td colspan="2">–7.4</td>
                <td>+3.8</td>
                <td>–51.8<sup>a</sup></td>
                <td>–52.3<sup>a</sup></td>
                <td>–13.3<sup>a</sup></td>
                <td>+10.5<sup>a</sup></td>
                <td>–47.3<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Syphilis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td colspan="2">34.3</td>
                <td>68.3</td>
                <td>54.8</td>
                <td>43.5</td>
                <td>43.6</td>
                <td>72.3</td>
                <td>20.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td colspan="2">–4.4</td>
                <td>+29.6<sup>a</sup></td>
                <td>+16.1<sup>a</sup></td>
                <td>+4.8</td>
                <td>+4.9</td>
                <td>+33.6<sup>a</sup></td>
                <td>–18.3<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Takotsubo cardiomyopathy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td colspan="2">10.3</td>
                <td>75.4</td>
                <td>70.1</td>
                <td>56.3</td>
                <td>98.4</td>
                <td>61.6</td>
                <td>90.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td colspan="2">–71.2<sup>a</sup></td>
                <td>–6.1</td>
                <td>–11.4<sup>a</sup></td>
                <td>–25.2<sup>a</sup></td>
                <td>+16.9<sup>a</sup></td>
                <td>–19.9<sup>a</sup></td>
                <td>+9.2</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Tricuspid endocarditis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td colspan="2">41.1</td>
                <td>74.8</td>
                <td>60.4</td>
                <td>83.6</td>
                <td>95.5</td>
                <td>70.8</td>
                <td>68.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td colspan="2">–40.2<sup>a</sup></td>
                <td>–6.5</td>
                <td>–20.9<sup>a</sup></td>
                <td>+2.3</td>
                <td>+14.2<sup>a</sup></td>
                <td>–10.5<sup>a</sup></td>
                <td>–12.8<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Tuberculosis</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio (%)</td>
                <td colspan="2">30.8</td>
                <td>22.1</td>
                <td>33.4</td>
                <td>4.9</td>
                <td>10.7</td>
                <td>63.4</td>
                <td>15.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SPD (%)</td>
                <td colspan="2">+19.5<sup>a</sup></td>
                <td>+10.8<sup>a</sup></td>
                <td>+22.1<sup>a</sup></td>
                <td>–6.4</td>
                <td>–0.6</td>
                <td>+52.1<sup>a</sup></td>
                <td>+3.8</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table9fn1">
              <p><sup>a</sup>When |SPD| &gt;10%, it indicates a significant deviation from real-world prevalence: values &gt;+10% denote overrepresentation, and values &lt;–10% denote underrepresentation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table10">
          <label>Table 10</label>
          <caption>
            <p>Statistical parity difference (SPD) mean values across 7 models for racial groups in 20 diseases and the average SPD for each racial group across 7 models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="140"/>
            <col width="150"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Models</td>
                <td>Black group (%)</td>
                <td>White group (%)</td>
                <td>Hispanic group (%)</td>
                <td>Asian group (%)</td>
                <td>Not available (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Qwen-1.8B</td>
                <td>0.6</td>
                <td>–20.4<sup>a</sup></td>
                <td>–9.5<sup>b</sup></td>
                <td>–3.1</td>
                <td>31.9</td>
              </tr>
              <tr valign="top">
                <td>Qwen-7B</td>
                <td>–3.7</td>
                <td>12.5<sup>a</sup></td>
                <td>–13.3<sup>b</sup></td>
                <td>–1.8</td>
                <td>5.9</td>
              </tr>
              <tr valign="top">
                <td>Qwen-14B</td>
                <td>2.7</td>
                <td>1.3</td>
                <td>–12.1<sup>b</sup></td>
                <td>–0.4</td>
                <td>8.1</td>
              </tr>
              <tr valign="top">
                <td>Llama2-7 B</td>
                <td>7</td>
                <td>–6.9</td>
                <td>–11.2<sup>b</sup></td>
                <td>6.7</td>
                <td>4.1</td>
              </tr>
              <tr valign="top">
                <td>Llama2-13 B</td>
                <td>3</td>
                <td>6.3</td>
                <td>–11.3<sup>b</sup></td>
                <td>–1.8</td>
                <td>3.5</td>
              </tr>
              <tr valign="top">
                <td>Yi-6B</td>
                <td>–17<sup>a</sup></td>
                <td>14.4<sup>a</sup></td>
                <td>–13.1<sup>b</sup></td>
                <td>–3.5</td>
                <td>18.8</td>
              </tr>
              <tr valign="top">
                <td>Yi-34B</td>
                <td>14.9<sup>a</sup></td>
                <td>–1</td>
                <td>–13<sup>b</sup></td>
                <td>–1.5</td>
                <td>0.1</td>
              </tr>
              <tr valign="top">
                <td>Average SPD across 7 models<sup>c</sup></td>
                <td>1.07</td>
                <td>0.89</td>
                <td>–11.93<sup>b</sup></td>
                <td>–0.77</td>
                <td>10.34</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table10fn1">
              <p><sup>a</sup>Columns shows different models behaved inconsistently.</p>
            </fn>
            <fn id="table10fn2">
              <p><sup>b</sup>Columns shows a significant underrepresentation across all models.</p>
            </fn>
            <fn id="table10fn3">
              <p><sup>c</sup>Column shows the average SPD for each racial group across 7 models.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Racial bias distribution heat map in the Qwen model across parameter sizes of 1.8 billion, 7 billion, and 14 billion. The data represented by the red frame is the actual racial distribution in the United States for the year 2020.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e65317_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Racial bias distribution heat map in the Llama2 model across parameter sizes of 7 billion and 13 billion. The data represented by the red frame is the actual racial distribution in the United States for the year 2020.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e65317_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Racial bias distribution heat map in the Yi model across parameter sizes of 6 billion and 34 billion. The data represented by the red frame is the actual racial distribution in the United States for the year 2020.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e65317_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <sec>
          <title>Overview</title>
          <p>In this study, we evaluated multiple LLMs of varying sizes on their ability to generate synthetic EHRs. Two primary metrics (EPS and SPD) were introduced to quantify how well each model captured clinical detail (via EPS) and how its outputs diverged across demographic groups (via SPD). Larger models, such as Yi-34B, Qwen-14B, and Llama2-13B, consistently attained higher EPS values, indicating that increased parameter capacity can lead to more comprehensive EHR generation. However, alongside these performance gains, we observed a pronounced escalation in gender and racial bias.</p>
          <p>Four major findings emerged from our analysis as described in subsequent sections.</p>
        </sec>
        <sec>
          <title>Performance-Bias Trade-Off</title>
          <p>As model size grew, specialized EHR generation performance improved, but gender and racial biases also became more pronounced. This underscores a tension between maximizing raw generative power and ensuring equitable representation.</p>
        </sec>
        <sec>
          <title>Widespread Gender and Racial Bias</title>
          <p>Biases were pervasive across all tested models, regardless of parameter size. Even smaller models displayed notable skew in their generated records, suggesting that bias issues are not exclusively tied to high-parameter architectures.</p>
        </sec>
        <sec>
          <title>Strong Association With Original Disease Distribution</title>
          <p>Gender bias showed a clear correlation with real-world prevalence patterns for the 18 tested diseases, whereas racial bias significantly aligned with real data only for a subset of those diseases. This suggests that some biases are closely tied to the original clinical patterns in the training data, while others may arise from broader generative tendencies.</p>
        </sec>
        <sec>
          <title>Heterogeneous Racial Bias</title>
          <p>Although gender bias primarily manifested as the overrepresentation of one gender, racial bias appeared in more nuanced ways. Certain diseases were skewed toward Black or White populations, while Hispanic and Asian groups were frequently underrepresented. These varied patterns, captured by our SPD metric, highlight the complexity of identifying and mitigating racial bias within synthetic EHRs.</p>
          <p>Overall, our findings emphasize the need to address demographic biases in tandem with improving model capabilities. While larger LLMs generally produce more detailed and plausible synthetic EHRs, unaddressed biases can perpetuate or even amplify disparities in health care datasets and decision-making processes.</p>
        </sec>
      </sec>
      <sec>
        <title>Comparison to Prior Work</title>
        <p>Existing studies have highlighted the potential for systematic group biases in LLMs tasked with generating EHRs. For example, Zack et al [<xref ref-type="bibr" rid="ref19">19</xref>] demonstrated that even after excluding diseases highly correlated with specific genders, such as prostate cancer and preeclampsia, GPT-4’s case distributions were still substantially diverged from real-world prevalence estimates. To further validate the generalizability of this risk, our work expands the scope to open-source models. Despite using a maximum parameter size of only 34 billion for the largest open-source model, our findings show a substantial positive correlation between model scale and bias intensity. This not only supports the hypothesis that “increasing model parameters may amplify generation bias” but also confirms the cross-platform consistency of this phenomenon across different model architectures.</p>
        <p>Regarding bias quantification, this study takes an innovative step by introducing the SPD metric as a unified, multidimensional evaluation framework, enabling fine-grained assessments of both gender and racial biases. In contrast to previous research that often relies on a single-dimension or task-specific measure, such as simple chi-square tests for gender and race distribution [<xref ref-type="bibr" rid="ref19">19</xref>]. The unified scale of SPD permits direct comparisons of how different sensitive attributes vary in both bias magnitude and clinical impact.</p>
        <p>Focusing on the gender dimension, our disease-specific analysis further refines existing insights. Aligned with the “epidemiological constraint” hypothesis proposed by Zack et al [<xref ref-type="bibr" rid="ref19">19</xref>], our results reveal a clear positive correlation between generation bias and the baseline gender distribution of each disease. This finding underscores the need for distributional calibration when creating disease-specific synthetic datasets to avert the somewhat polarized outcomes observed here—for instance, our study recorded a 99.3% female majority in Llama 2-13B in the rheumatoid arthritis cases generated.</p>
        <p>Regarding the racial dimension, the cross-disease SPD measurements further validate the metric’s utility. Calculating average SPD scores across models indicates a marked negative bias for Hispanic populations in most models (SPD&lt;–10%), and substantial underrepresentation of Asian populations (mean SPD&lt;0%), consistent with the single-model (GPT-4) findings of Zack et al [<xref ref-type="bibr" rid="ref19">19</xref>]. Black and White populations also showed significant variations in average SPD across models, suggesting that SPD is sufficiently sensitive for detecting multiethnic biases. By using a multimodel comparative approach, our study offers a more comprehensive and generalizable perspective on the mechanisms through which these biases emerge, compared with single-model analyses from prior research.</p>
        <p>Beyond large-scale generative models, conventional EHR performance metrics have been explored in medical informatics. Classic measures—such as accuracy, <italic>F</italic><sub>1</sub>-score, and domain-specific statistics (eg, positive predictive value, recall, and specificity)—primarily target classification or detection tasks within structured clinical datasets. Although these metrics are still relevant for evaluating core EHR functions (eg, diagnosis codes and medication lists), they may fall short of capturing the multidimensional nature of synthetic EHR generated by LLM, where hallucinated attributes, incomplete records, or biases in demographic fields can heavily skew real-world applicability.</p>
        <p>In addition, general-purpose LLM benchmarks like MMLU and C-Eval offer insights into a model’s language understanding and reasoning capabilities but do not adequately address domain-specific challenges inherent to EHR generation. These high-level evaluations lack field-level completeness checks and do not assess the realism and clinical plausibility of the records, which are crucial factors in health care settings. Therefore, relying solely on these benchmarks may obscure significant issues, such as synthetic data hallucinations, logical inconsistencies, or demographic imbalances that are particularly concerning in medical domains.</p>
        <p>To address these limitations, this study introduces EPS and EPS<italic><sub>i</sub></italic> as novel dual-metric evaluation frameworks for assessing LLMs in synthetic EHR generation. EPS evaluates the systematic quality and completeness of generated records, while EPS<italic><sub>i</sub></italic> provides a field-specific granular analysis to quantify the model’s precision in generating individual attributes. The synergistic action of these dual metrics enables panoramic performance evaluation of LLMs while revealing capability divergence patterns across models and attributes through comparative analysis.</p>
        <p>Recent studies have showcased Llama2’s potential in diverse health care contexts—ranging from disease detection and clinical information extraction to predictive analysis and phenotyping [<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref46">46</xref>]. For instance, Llama2-based models have demonstrated competitive performance in metastasis detection for breast cancer, clinical text mining for epilepsy data, and discharge prediction through EHR audit logs. Research also highlights the model’s utility in health care natural language processing tasks (eg, temporal relations extraction for chemotherapy tracking and oncological data standardization), illustrating broad applicability and promising results.</p>
        <p>However, Llama2 faces similar challenges observed in other LLMs—namely, data privacy concerns, interpretability issues, and the potential to amplify existing biases if not rigorously monitored. Our study’s findings regarding the performance-bias trade-off resonate with these broader concerns: while larger Llama2 variants might yield more detailed and accurate synthetic EHRs, they could also introduce greater disparities in gender and racial representation.</p>
        <p>Given that health informatics fundamentally relies on reliable, equitable data for clinical decision-making, Llama2’s success in predictive accuracy and data extraction underscores its value. However, these same tasks demand a careful examination of how biases might propagate through patient records, potentially affecting real-world health outcomes. Building on other reports where Llama2 was compared with GPT-4 and specialized Bidirectional Encoder Representations from Transformers–based models, our study suggests that model size and training data composition may significantly influence both performance and demographic skew. As Llama2 continues to evolve (eg, newer architectures such as “Llama3” or lightweight variants like Mistral), striking the right balance between performance gains and bias mitigation remains a pivotal challenge for integrative clinical applications.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>This study offers a comprehensive exploration of bias in synthetic EHR generation by evaluating multiple models across various diseases. We introduce the SPD metric to quantify both gender and racial biases, providing a health care–focused measure that goes beyond traditional benchmarks. In addition, by applying the EPS metric to gauge overall EHR realism, we address gaps left by general-purpose evaluations (eg, MMLU and C-Eval) that lack fine-grained insights into the clinical plausibility of generated data. Our detailed bias analysis—covering gender polarization and racial underrepresentation—helps quantify the specific ways in which large models may skew medical data, laying a foundation for future improvements in both model design and training protocols.</p>
        <p>Despite these contributions, our work has certain constraints. First, limited transparency regarding training data makes it difficult to pinpoint the root causes of observed biases, a challenge that applies broadly to commercially and even some open-source LLMs. Second, we conducted a single-round evaluation; thus, iterative fine-tuning or other debiasing techniques were not explored. Third, no formal bias mitigation strategies were applied, leaving open questions about how targeted interventions (eg, data augmentation, adversarial training, or post hoc rebalancing) might improve model fairness. Finally, while our generated EHRs were aligned with known prevalence data, we did not incorporate detailed clinician review to fully assess the clinical validity and completeness of these synthetic records, underscoring the need for multidisciplinary evaluations in future work.</p>
      </sec>
      <sec>
        <title>Future Directions</title>
        <p>Future studies should prioritize enhancing the transparency of training data sources and documentation practices for both commercial and open-source LLMs, enabling systematic audits to identify and mitigate biases. Multi-round, iterative testing protocols that incorporate dynamic fine-tuning and debiasing techniques (eg, data augmentation, adversarial training, or fairness-aware optimization frameworks) could further elucidate pathways to reducing model biases. In addition, rigorous evaluations of formal bias mitigation strategies, including clinician-guided audits of synthetic EHRs for clinical validity, completeness, and alignment with real-world practice patterns, are essential. Collaborative, multidisciplinary efforts involving ethicists, clinicians, and model developers will be critical to advancing equitable AI applications in health care while balancing technical innovation with domain-specific rigor.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Our study demonstrates that as LLMs grow larger, they are capable of producing more detailed and realistic synthetic EHRs, yet this enhanced capacity comes at the cost of increasing demographic biases. We observed a notable gender polarization effect and a heterogeneous pattern of racial biases, underscoring the complexity of fair EHR generation. Although these findings affirm concerns raised in earlier research—particularly the performance-bias trade-off—they also highlight actionable opportunities for improving both model accuracy and equity in future health care applications.</p>
        <p>By introducing EPS and SPD metrics, we provide a domain-specific framework for systematically evaluating the dual challenges of performance and bias. Addressing these challenges will require iterative bias mitigation strategies, multidisciplinary collaborations with clinical experts, and the continued development of tailored evaluation benchmarks. Ultimately, balancing the promise of advanced synthetic EHR generation against the ethical and practical imperatives of unbiased health care data stands as a key frontier in medical informatics.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Prompts and disease.</p>
        <media xlink:href="jmir_v27i1e65317_app1.docx" xlink:title="DOCX File , 32 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Chi-square analysis results.</p>
        <media xlink:href="jmir_v27i1e65317_app2.docx" xlink:title="DOCX File , 65 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Racial bias distribution.</p>
        <media xlink:href="jmir_v27i1e65317_app3.docx" xlink:title="DOCX File , 72 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">C-Eval</term>
          <def>
            <p>comprehensive Chinese evaluation suite for foundation models</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EPS</term>
          <def>
            <p>electronic health record performance score</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MMLU</term>
          <def>
            <p>Massive Multitask Language Understanding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SPD</term>
          <def>
            <p>statistical parity difference</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The computational resources generously provided by the High Performance Computing Center of Nanjing Medical University are greatly appreciated.</p>
      <p>This work was supported by the National Key Research and Development Program for the project “Integrated Regional Application Demonstration of Digitalization Technologies for Proactive Health Services” (Grant 2023YFC3605800); the International Collaboration Project “Intelligent Management Software for Multimodal Medical Data Aimed at Next-Generation Information Technologies” (Grant 2022); the Nanjing Municipal Science and Technology Bureau project “Cooperative Research and Transformation for a Proactive Intelligent Health Management Platform for Diabetes” (Grant 202205053); the Provincial Department of Science and Technology’s Social Development Plan project “Construction and Application of an Early Diagnosis and Prediction Model for Gestational Diabetes Based on Multimodal Information” (grant BE2023781); the Nanjing Postdoctoral Research Funding Program project “Mechanistic Study of Intestinal TLR4 Signaling Pathway in Metabolic Regulation in Elderly Diabetes Patients” (Grant BSHNJ202211 [JC22]); the National Natural Science Foundation of China “Research on Key Technologies of Anti-Interference in Large-Scale Wireless Communication Networks” (grant 62401641); and the China Postdoctoral Science Foundation “Research on intelligent wireless access for ultrareliable and low-latency communication” (grant 2023M742672).</p>
    </ack>
    <notes>
      <title>Data Availability</title>
      <p>All data generated or analyzed during this study are included in this published article and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>RH wrote the main manuscript. YL and CW reviewed the entire manuscript and served as the corresponding authors. HQ, YY, and YX conducted the experiments, collected the data, and prepared the figures. CZ, HW, and XW provided suggestions for the experimental methods. SL, XZ, and JK contributed to the interpretation of the results. All authors reviewed and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>OpenAI</collab>
          </person-group>
          <article-title>GPT-4 technical report</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on March 15, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2303.08774"/>
          </comment>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SH</given-names>
            </name>
          </person-group>
          <article-title>Natural language generation for electronic health records</article-title>
          <source>NPJ Digit Med</source>
          <year>2018</year>
          <month>11</month>
          <day>19</day>
          <volume>1</volume>
          <fpage>63</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-018-0070-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-018-0070-0</pub-id>
          <pub-id pub-id-type="medline">30687797</pub-id>
          <pub-id pub-id-type="pmcid">PMC6345174</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arvanitis</surname>
              <given-names>TN</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chaplin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Despotou</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A method for machine learning generation of realistic synthetic datasets for validating healthcare applications</article-title>
          <source>Health Informatics J</source>
          <year>2022</year>
          <volume>28</volume>
          <issue>2</issue>
          <fpage>14604582221077000</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/14604582221077000?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/14604582221077000</pub-id>
          <pub-id pub-id-type="medline">35414269</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ive</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Viani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Verma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Puntis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cardinal</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Generation and evaluation of artificial mental health records for natural language processing</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <month>05</month>
          <day>14</day>
          <volume>3</volume>
          <fpage>69</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-0267-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0267-x</pub-id>
          <pub-id pub-id-type="medline">32435697</pub-id>
          <pub-id pub-id-type="pii">267</pub-id>
          <pub-id pub-id-type="pmcid">PMC7224173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nashwan</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>AbuJaber</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Harnessing the power of large language models (LLMs) for electronic health records (EHRs) optimization</article-title>
          <source>Cureus</source>
          <year>2023</year>
          <month>07</month>
          <day>29</day>
          <volume>15</volume>
          <issue>7</issue>
          <fpage>e42634</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37644945"/>
          </comment>
          <pub-id pub-id-type="doi">10.7759/cureus.42634</pub-id>
          <pub-id pub-id-type="medline">37644945</pub-id>
          <pub-id pub-id-type="pmcid">PMC10461074</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>PourNejatian</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Parisien</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Compas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Magoc</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Harle</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Lipori</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Hogan</surname>
              <given-names>WR</given-names>
            </name>
            <name name-style="western">
              <surname>Shenkman</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A large language model for electronic health records</article-title>
          <source>NPJ Digit Med</source>
          <year>2022</year>
          <month>12</month>
          <day>26</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>194</fpage>
          <pub-id pub-id-type="doi">10.1038/s41746-022-00742-2</pub-id>
          <pub-id pub-id-type="medline">36572766</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-022-00742-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC9792464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fontoura</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cull</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Beaulieu-Jones</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Kovacheva</surname>
              <given-names>VP</given-names>
            </name>
          </person-group>
          <article-title>Zero-shot interpretable phenotyping of postpartum hemorrhage using large language models</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <month>11</month>
          <day>30</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>212</fpage>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00957-x</pub-id>
          <pub-id pub-id-type="medline">38036723</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00957-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC10689487</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hartmann</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schwenzow</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Witte</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The political ideology of conversational AI: converging evidence on ChatGPT's pro-environmental, left-libertarian orientation</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on January 5, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2301.01768"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2301.01768</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ganguli</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lovitt</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Askell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kadavath</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schiefer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ndousse</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bowman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Conerly</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>DasSarma</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Drain</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Elhage</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>El-Showk</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fort</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hatfield-Dodds</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Henighan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hume</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Johnston</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kravec</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Olsson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ringer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tran-Johnson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Joseph</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>McCandlish</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Olah</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Red teaming language models to reduce harms: methods, scaling behaviors, and lessons learned</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on August 23, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2209.07858"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2209.07858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Perspectives on the social impacts of reinforcement learning with human feedback</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on March 6, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2303.02891"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2303.02891</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bhatt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Talukdar</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dave</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Prabhakaran</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Re-contextualizing fairness in NLP: the case of India</article-title>
          <source>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing</source>
          <year>2022</year>
          <conf-name>AACL-IJCNLP 2022</conf-name>
          <conf-date>November 21-23, 2022</conf-date>
          <conf-loc>Taipei, Taiwan</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2022.aacl-main.55</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ariyanayagam</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Gender bias in medical education: stop treating it as an inevitability</article-title>
          <source>Med Educ</source>
          <year>2020</year>
          <month>09</month>
          <volume>54</volume>
          <issue>9</issue>
          <fpage>863</fpage>
          <pub-id pub-id-type="doi">10.1111/medu.14186</pub-id>
          <pub-id pub-id-type="medline">32324942</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verdonk</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Benschop</surname>
              <given-names>YW</given-names>
            </name>
            <name name-style="western">
              <surname>de Haes</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Lagro-Janssen</surname>
              <given-names>TL</given-names>
            </name>
          </person-group>
          <article-title>From gender bias to gender awareness in medical education</article-title>
          <source>Adv Health Sci Educ Theory Pract</source>
          <year>2009</year>
          <month>03</month>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>135</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1007/s10459-008-9100-z</pub-id>
          <pub-id pub-id-type="medline">18274877</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>DD</given-names>
            </name>
          </person-group>
          <article-title>Examining the unconscious racial biases and attitudes of physicians, nurses, and the public: implications for future health care education and practice</article-title>
          <source>Health Equity</source>
          <year>2022</year>
          <month>05</month>
          <day>18</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>375</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35651358"/>
          </comment>
          <pub-id pub-id-type="doi">10.1089/heq.2021.0141</pub-id>
          <pub-id pub-id-type="medline">35651358</pub-id>
          <pub-id pub-id-type="pii">10.1089/heq.2021.0141</pub-id>
          <pub-id pub-id-type="pmcid">PMC9148656</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Vasquez</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Getrich</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Kano</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Boursaw</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Krabbenhoft</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sussman</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <article-title>Racial/gender biases in student clinical decision-making: a mixed-method study of medical school attributes associated with lower incidence of biases</article-title>
          <source>J Gen Intern Med</source>
          <year>2018</year>
          <month>12</month>
          <volume>33</volume>
          <issue>12</issue>
          <fpage>2056</fpage>
          <lpage>64</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29998436"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-018-4543-2</pub-id>
          <pub-id pub-id-type="medline">29998436</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-018-4543-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6258638</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Omiye</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Lester</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Spichak</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rotemberg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Daneshjou</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Large language models propagate race-based medicine</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <month>10</month>
          <day>20</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>195</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-023-00939-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00939-z</pub-id>
          <pub-id pub-id-type="medline">37864012</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00939-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC10589311</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corsino</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Railey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ostrovsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pinheiro</surname>
              <given-names>SO</given-names>
            </name>
            <name name-style="western">
              <surname>McGhan-Johnson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Padilla</surname>
              <given-names>BI</given-names>
            </name>
          </person-group>
          <article-title>The impact of racial bias in patient care and medical education: let's focus on the educator</article-title>
          <source>MedEdPORTAL</source>
          <year>2021</year>
          <month>09</month>
          <day>02</day>
          <volume>17</volume>
          <fpage>11183</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34557589"/>
          </comment>
          <pub-id pub-id-type="doi">10.15766/mep_2374-8265.11183</pub-id>
          <pub-id pub-id-type="medline">34557589</pub-id>
          <pub-id pub-id-type="pii">11183</pub-id>
          <pub-id pub-id-type="pmcid">PMC8410857</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abd-Alrazaq</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>AlSaad</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Alhuwail</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Healy</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Latifi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aziz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Damseh</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Alabed Alrazak</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sheikh</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Large language models in medical education: opportunities, challenges, and future directions</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <month>06</month>
          <day>01</day>
          <volume>9</volume>
          <fpage>e48291</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e48291/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/48291</pub-id>
          <pub-id pub-id-type="medline">37261894</pub-id>
          <pub-id pub-id-type="pii">v9i1e48291</pub-id>
          <pub-id pub-id-type="pmcid">PMC10273039</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zack</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Suzgun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Gichoya</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jurafsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Abdulnour</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Assessing the potential of GPT-4 to perpetuate racial and gender biases in health care: a model evaluation study</article-title>
          <source>Lancet Digit Health</source>
          <year>2024</year>
          <month>01</month>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e12</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(23)00225-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(23)00225-X</pub-id>
          <pub-id pub-id-type="medline">38123252</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(23)00225-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Whelton</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Aronow</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Casey</surname>
              <given-names>DE Jr</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dennison Himmelfarb</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>DePalma</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Gidding</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jamerson</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>MacLaughlin</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Muntner</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ovbiagele</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>SC Jr</given-names>
            </name>
            <name name-style="western">
              <surname>Spencer</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Stafford</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Taler</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>KA Sr</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>Jd</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>JT Jr</given-names>
            </name>
          </person-group>
          <article-title>2017 ACC/AHA/AAPA/ABC/ACPM/AGS/APhA/ASH/ASPC/NMA/PCNA Guideline for the prevention, detection, evaluation, and management of high blood pressure in adults: a report of the American College of Cardiology/American Heart Association Task Force on Clinical Practice Guidelines</article-title>
          <source>Circulation</source>
          <year>2018</year>
          <month>10</month>
          <day>23</day>
          <volume>138</volume>
          <issue>17</issue>
          <fpage>e426</fpage>
          <lpage>83</lpage>
          <pub-id pub-id-type="doi">10.1161/CIR.0000000000000597</pub-id>
          <pub-id pub-id-type="medline">30354655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Raymond</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Punjani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Muravov</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lyles</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Horton</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of amyotrophic lateral sclerosis in the United States, 2018</article-title>
          <source>Amyotroph Lateral Scler Frontotemporal Degener</source>
          <year>2023</year>
          <month>08</month>
          <day>21</day>
          <volume>24</volume>
          <fpage>1</fpage>
          <lpage>7</lpage>
          <comment>(forthcoming)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/abs/10.1080/21678421.2023.2245858?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/></comment>
          <pub-id pub-id-type="doi">10.1080/21678421.2023.2245858</pub-id>
          <pub-id pub-id-type="medline">37602649</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fingar</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Mabry-Hernandez</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ngo-Metzger</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wolff</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Steiner</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Elixhauser</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Delivery hospitalizations involving preeclampsia and eclampsia, 2005–2014</article-title>
          <source>Healthcare Cost and Utilization Project (HCUP) Statistical Briefs [Internet]</source>
          <year>2006</year>
          <publisher-loc>Rockville, MD</publisher-loc>
          <publisher-name>Agency for Healthcare Research and Quality</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <article-title>Fast facts: HIV in the US by race and ethnicity</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2024</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/hiv/group/racialethnic/other-races/diagnoses.html">https://www.cdc.gov/hiv/group/racialethnic/other-races/diagnoses.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <article-title>Reported Tuberculosis in the United States, 2020</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2025-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://archive.cdc.gov/#/details?q=Reported%20TB%20in%20the%20US%202020&amp;start=0&amp;rows=10&amp;url=https://www.cdc.gov/tb/statistics/reports/2020/Exec_Commentary.html">https://tinyurl.com/5n7rajsc</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baughman</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Field</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Costabel</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Crystal</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Culver</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Drent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Judson</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Wolff</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Sarcoidosis in America. Analysis based on health care use</article-title>
          <source>Ann Am Thorac Soc</source>
          <year>2016</year>
          <month>08</month>
          <volume>13</volume>
          <issue>8</issue>
          <fpage>1244</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1513/AnnalsATS.201511-760OC</pub-id>
          <pub-id pub-id-type="medline">27509154</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <article-title>Primary and secondary syphilis — reported cases and rates of reported cases by age group and sex, 2017–2021</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2024-01-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://web.archive.org/web/20231120122402/https://www.cdc.gov/std/statistics/2021/tables/15.htm">https://web.archive.org/web/20231120122402/https://www.cdc.gov/std/statistics/2021/tables/15.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>O'Neil</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>NF</given-names>
            </name>
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>HK</given-names>
            </name>
          </person-group>
          <article-title>Prostate cancer incidence and survival, by stage and race/ethnicity - United States, 2001-2017</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2020</year>
          <month>10</month>
          <day>16</day>
          <volume>69</volume>
          <issue>41</issue>
          <fpage>1473</fpage>
          <lpage>1480</lpage>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6941a1</pub-id>
          <pub-id pub-id-type="medline">33056955</pub-id>
          <pub-id pub-id-type="pmcid">PMC7561091</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Izmirly</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Ferucci</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Somers</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Drenkard</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dall'Era</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McCune</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Helmick</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Parton</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Incidence rates of systemic lupus erythematosus in the USA: estimates from a meta-analysis of the Centers for Disease Control and Prevention national lupus registries</article-title>
          <source>Lupus Sci Med</source>
          <year>2021</year>
          <month>12</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>e000614</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://lupus.bmj.com/lookup/pmidlookup?view=long&amp;pmid=34921094"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/lupus-2021-000614</pub-id>
          <pub-id pub-id-type="medline">34921094</pub-id>
          <pub-id pub-id-type="pii">8/1/e000614</pub-id>
          <pub-id pub-id-type="pmcid">PMC8685969</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>MZ</given-names>
            </name>
          </person-group>
          <article-title>Racial and gender trends in infective endocarditis related deaths in United States (2004-2017)</article-title>
          <source>Am J Cardiol</source>
          <year>2020</year>
          <month>08</month>
          <day>15</day>
          <volume>129</volume>
          <fpage>125</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amjcard.2020.05.037</pub-id>
          <pub-id pub-id-type="medline">32600782</pub-id>
          <pub-id pub-id-type="pii">S0002-9149(20)30547-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Wagle</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Cercek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Jemal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Colorectal cancer statistics, 2023</article-title>
          <source>CA Cancer J Clin</source>
          <year>2023</year>
          <volume>73</volume>
          <issue>3</issue>
          <fpage>233</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.3322/caac.21772"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/caac.21772</pub-id>
          <pub-id pub-id-type="medline">36856579</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burton</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Flannery</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Farley</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Gershman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Lynfield</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Petit</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Reingold</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Schaffner</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Plikaytis</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>CE Jr</given-names>
            </name>
            <name name-style="western">
              <surname>Whitney</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Schuchat</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Socioeconomic and racial/ethnic disparities in the incidence of bacteremic pneumonia among US adults</article-title>
          <source>Am J Public Health</source>
          <year>2010</year>
          <month>10</month>
          <volume>100</volume>
          <issue>10</issue>
          <fpage>1904</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.2105/AJPH.2009.181313</pub-id>
          <pub-id pub-id-type="medline">20724687</pub-id>
          <pub-id pub-id-type="pii">AJPH.2009.181313</pub-id>
          <pub-id pub-id-type="pmcid">PMC2936986</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kawatkar</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriel</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Secular trends in the incidence and prevalence of rheumatoid arthritis within members of an integrated health care delivery system</article-title>
          <source>Rheumatol Int</source>
          <year>2019</year>
          <month>03</month>
          <volume>39</volume>
          <issue>3</issue>
          <fpage>541</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1007/s00296-018-04235-y</pub-id>
          <pub-id pub-id-type="medline">30656412</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00296-018-04235-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hittle</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Culpepper</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Langer-Gould</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marrie</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Cutter</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Kaye</surname>
              <given-names>WE</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>LaRocca</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Wallin</surname>
              <given-names>MT</given-names>
            </name>
          </person-group>
          <article-title>Population-based estimates for the prevalence of multiple sclerosis in the United States by race, ethnicity, age, sex, and geographic region</article-title>
          <source>JAMA Neurol</source>
          <year>2023</year>
          <month>07</month>
          <day>01</day>
          <volume>80</volume>
          <issue>7</issue>
          <fpage>693</fpage>
          <lpage>701</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37184850"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamaneurol.2023.1135</pub-id>
          <pub-id pub-id-type="medline">37184850</pub-id>
          <pub-id pub-id-type="pii">2805038</pub-id>
          <pub-id pub-id-type="pmcid">PMC10186207</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <article-title>United States cancer statistics: data visualizations</article-title>
          <source>US Centers for Disease Control and Prevention</source>
          <access-date>2024-04-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://gis.cdc.gov/Cancer/USCS/#/AtAGlance/">https://gis.cdc.gov/Cancer/USCS/#/AtAGlance/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zaghlol</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dey</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Desale</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Barac</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Racial differences in takotsubo cardiomyopathy outcomes in a large nationwide sample</article-title>
          <source>ESC Heart Fail</source>
          <year>2020</year>
          <month>06</month>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>1056</fpage>
          <lpage>63</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32147963"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/ehf2.12664</pub-id>
          <pub-id pub-id-type="medline">32147963</pub-id>
          <pub-id pub-id-type="pmcid">PMC7261569</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kruszon-Moran</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Paulose-Ram</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Barker</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>McQuillan</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Prevalence and trends in hepatitis B virus infection in the United States, 2015-2018</article-title>
          <source>NCHS Data Brief</source>
          <year>2020</year>
          <month>03</month>
          <issue>361</issue>
          <fpage>1</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cdc.gov/nchs/data/databriefs/db361-h.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">32487291</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>COVID data tracker</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2024-04-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://covid.cdc.gov/covid-data-tracker">https://covid.cdc.gov/covid-data-tracker</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bruzelius</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Scarpa</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Basu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Faghmous</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Baum</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Huntington's disease in the United States: variation by demographic and socioeconomic factors</article-title>
          <source>Mov Disord</source>
          <year>2019</year>
          <month>06</month>
          <volume>34</volume>
          <issue>6</issue>
          <fpage>858</fpage>
          <lpage>65</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30868663"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/mds.27653</pub-id>
          <pub-id pub-id-type="medline">30868663</pub-id>
          <pub-id pub-id-type="pmcid">PMC6579693</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <article-title>Major depression</article-title>
          <source>National Institute of Mental Health</source>
          <access-date>2024-04-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nimh.nih.gov/health/statistics/major-depression">https://www.nimh.nih.gov/health/statistics/major-depression</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hendrycks</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Burns</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Basart</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mazeika</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Steinhardt</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Measuring massive multitask language understanding</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on September 7, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2009.03300"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2009.03300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>C-EVAL: a multi-level multi-discipline Chinese evaluation suite for foundation models</article-title>
          <source>Proceedings of the 37th International Conference on Neural Information Processing Systems</source>
          <year>2023</year>
          <conf-name>NIPS '23</conf-name>
          <conf-date>December 10-16, 2023</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cimpian</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Based on billions of words on the internet, people = men</article-title>
          <source>Sci Adv</source>
          <year>2022</year>
          <month>04</month>
          <volume>8</volume>
          <issue>13</issue>
          <fpage>eabm2463</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https:///www.science.org/doi/10.1126/sciadv.abm2463?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/sciadv.abm2463</pub-id>
          <pub-id pub-id-type="medline">35363515</pub-id>
          <pub-id pub-id-type="pmcid">PMC10938580</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holgate</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McWilliam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Viana</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Winston</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Teo</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Extracting epilepsy patient data with Llama 2</article-title>
          <source>Proceedings of the 23rd Workshop on Biomedical Natural Language Processing</source>
          <year>2024</year>
          <conf-name>BioNLP 2024</conf-name>
          <conf-date>August 16, 2024</conf-date>
          <conf-loc>Bangkok, Thailand</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2024.bionlp-1.43</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bhattarai</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>IY</given-names>
            </name>
            <name name-style="western">
              <surname>Sierra</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Payne</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Abrams</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Leveraging GPT-4 for identifying cancer phenotypes in electronic health records: a performance comparison between GPT-4, GPT-3.5-turbo, Flan-T5, Llama-3-8B, and spaCy's rule-based and machine learning-based methods</article-title>
          <source>JAMIA Open</source>
          <year>2024</year>
          <month>07</month>
          <day>03</day>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>ooae060</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38962662"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooae060</pub-id>
          <pub-id pub-id-type="medline">38962662</pub-id>
          <pub-id pub-id-type="pii">ooae060</pub-id>
          <pub-id pub-id-type="pmcid">PMC11221943</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ioanovici</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Talby</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Buijs</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Lexicans at chemotimelines 2024: chemotimeline chronicles - leveraging large language models (LLMs) for temporal relations extraction in oncological electronic health records</article-title>
          <source>Proceedings of the 6th Clinical Natural Language Processing Workshop</source>
          <year>2024</year>
          <conf-name>ClinicalNLP 2024</conf-name>
          <conf-date>June 21, 2024</conf-date>
          <conf-loc>Mexico City, Mexicoc</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2024.clinicalnlp-1.38</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lilli</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Patarnello</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Masciocchi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Masiello</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Marazzi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Luca</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Capocchiano</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>LlamaMTS: optimizing metastasis detection with Llama instruction tuning and BERT-based ensemble in Italian clinical reports</article-title>
          <source>Proceedings of the 6th Clinical Natural Language Processing Workshop</source>
          <year>2024</year>
          <conf-name>ClinicalNLP 2024</conf-name>
          <conf-date>June 21, 2024</conf-date>
          <conf-loc>Mexico City, Mexico</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2024.clinicalnlp-1.13</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
