<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i11e31337</article-id>
      <article-id pub-id-type="pmid">34581671</article-id>
      <article-id pub-id-type="doi">10.2196/31337</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Predicting COVID-19–Related Health Care Resource Utilization Across a Statewide Patient Population: Model Development Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Basch</surname>
            <given-names>Corey</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Khurshid</surname>
            <given-names>Anjum</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Nagavally</surname>
            <given-names>Sneha</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mircheva</surname>
            <given-names>Iskra</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Xie</surname>
            <given-names>Zidian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Kasturi</surname>
            <given-names>Suranga N</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Regenstrief Institute</institution>
            <addr-line>1101 W 10th St</addr-line>
            <addr-line>Indianapolis, IN, 46202</addr-line>
            <country>United States</country>
            <phone>1 (317) 274 9000</phone>
            <email>snkasthu@iu.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6630-4598</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Park</surname>
            <given-names>Jeremy</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6551-1034</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wild</surname>
            <given-names>David</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7821-3872</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Khan</surname>
            <given-names>Babar</given-names>
          </name>
          <degrees>MD, MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4507-6968</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Haggstrom</surname>
            <given-names>David A</given-names>
          </name>
          <degrees>MD, MAS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3637-6950</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Grannis</surname>
            <given-names>Shaun</given-names>
          </name>
          <degrees>MD, MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8093-6639</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Regenstrief Institute</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Pediatrics</institution>
        <institution>Indiana University School of Medicine</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Luddy School of Informatics Computing and Engineering</institution>
        <institution>Indiana University</institution>
        <addr-line>Bloomington, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Medicine</institution>
        <institution>Indiana University School of Medicine</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Family Medicine</institution>
        <institution>Indiana University School of Medicine</institution>
        <addr-line>Indianapolis, IN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Suranga N Kasturi <email>snkasthu@iu.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>11</issue>
      <elocation-id>e31337</elocation-id>
      <history>
        <date date-type="received">
          <day>20</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>19</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>7</day>
          <month>9</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>9</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Suranga N Kasturi, Jeremy Park, David Wild, Babar Khan, David A Haggstrom, Shaun Grannis. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 15.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/11/e31337" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The COVID-19 pandemic has highlighted the inability of health systems to leverage existing system infrastructure in order to rapidly develop and apply broad analytical tools that could inform state- and national-level policymaking, as well as patient care delivery in hospital settings. The COVID-19 pandemic has also led to highlighted systemic disparities in health outcomes and access to care based on race or ethnicity, gender, income-level, and urban-rural divide. Although the United States seems to be recovering from the COVID-19 pandemic owing to widespread vaccination efforts and increased public awareness, there is an urgent need to address the aforementioned challenges.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to inform the feasibility of leveraging broad, statewide datasets for population health–driven decision-making by developing robust analytical models that predict COVID-19–related health care resource utilization across patients served by Indiana’s statewide Health Information Exchange.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We leveraged comprehensive datasets obtained from the Indiana Network for Patient Care to train decision forest-based models that can predict patient-level need of health care resource utilization. To assess these models for potential biases, we tested model performance against subpopulations stratified by age, race or ethnicity, gender, and residence (urban vs rural).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>For model development, we identified a cohort of 96,026 patients from across 957 zip codes in Indiana, United States. We trained the decision models that predicted health care resource utilization by using approximately 100 of the most impactful features from a total of 1172 features created. Each model and stratified subpopulation under test reported precision scores &#62;70%, accuracy and area under the receiver operating curve scores &#62;80%, and sensitivity scores approximately &#62;90%. We noted statistically significant variations in model performance across stratified subpopulations identified by age, race or ethnicity, gender, and residence (urban vs rural).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study presents the possibility of developing decision models capable of predicting patient-level health care resource utilization across a broad, statewide region with considerable predictive performance. However, our models present statistically significant variations in performance across stratified subpopulations of interest. Further efforts are necessary to identify root causes of these biases and to rectify them.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>machine learning</kwd>
        <kwd>population health</kwd>
        <kwd>health care utilization</kwd>
        <kwd>health disparities</kwd>
        <kwd>health information</kwd>
        <kwd>epidemiology</kwd>
        <kwd>public health</kwd>
        <kwd>digital health</kwd>
        <kwd>health data</kwd>
        <kwd>pandemic</kwd>
        <kwd>decision models</kwd>
        <kwd>health informatics</kwd>
        <kwd>healthcare resources</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The COVID-19 pandemic has impacted the health and well-being of individuals, communities, and economies worldwide at an unprecedented scale [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. As of June 1, 2021, the COVID-19 pandemic has infected over 170 million people worldwide and claimed the lives of over 3.5 million people. In the United States alone, COVID-19 has infected over 33 million people and claimed over 600,000 lives. In addition to the loss of lives and other adverse health outcomes, the enforcement of preventative measures, such as lockdowns and mask-wearing mandates, have further affected the mental and physical well-being of individuals and communities. The cumulative financial costs of the COVID-19 pandemic caused by lost output and health reduction has been estimated at US $16 trillion, or approximately 90% of the annual gross domestic product of the United States [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>In the United States, the COVID-19 pandemic has highlighted (1) the inability of health systems to leverage existing system infrastructure in order to rapidly develop and apply broad analytical tools that could inform state- and national-level policymaking and patient care delivery in hospital settings and (2) systemic disparities in COVID-19–related outcomes and access to care based on race or ethnicity [<xref ref-type="bibr" rid="ref4">4</xref>], gender [<xref ref-type="bibr" rid="ref5">5</xref>], income level, and urban-rural divide [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. At the peak of the pandemic outbreak in the United States, these limitations contributed to distrust, misinformation, and lack of cohesive decision-making. This impeded local government and public health officials from making informed policy decisions, such as mask-wearing mandates and stay-at-home orders, to control disease outbreaks and safeguard health systems from extended strain. This led to shortages in hospital beds, personal protective equipment, and health care personnel, thereby causing significant disruptions to health care delivery and consequent loss of lives [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>Although the United States seems to be recovering from the COVID-19 pandemic owing to widespread vaccination efforts and increased public awareness, there is still a need to address the aforementioned limitations. Overcoming these limitations will ensure better disaster preparedness and response in anticipation of any future outbreaks caused by either COVID-19 variants or other diseases and to manage the care of vaccine-hesitant populations. The United States boasts significant health information system infrastructure, resulting in the active collection of a wide variety of patient-level clinical, medication, and visit history data. However, such datasets are often siloed across different health systems. As a result, analytical model development is often spearheaded at the health system level. Although such models may be useful in caring for a specific health system, they may not generalize across broader populations and cannot contribute to large-scale public health responses delivered across broad geographies, such as at the county, metropolitan area, or state level.</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>In this study, we sought to inform the feasibility of leveraging broad, statewide datasets for population health–driven decision-making by developing robust analytical models that predicted COVID-19–related health care resource utilization at the patient level among those served by Indiana’s statewide Health Information Exchange (HIE).</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Patient Population and Data Sources</title>
        <p>We leveraged the COVID-19 Research Data Commons (CoRDaCo) [<xref ref-type="bibr" rid="ref8">8</xref>], a rich, statewide dataset curated by the Regenstrief Institute of Indianapolis and Indiana University. The CoRDaCo dataset seeks to enable better access to data on COVID-19–positive patients for research purposes. It integrates data from multiple clinical sources, including the Indiana Network for Patient Care (INPC) [<xref ref-type="bibr" rid="ref9">9</xref>]—one of the longest continuously operated statewide HIEs in the United States consisting of data from over 15 million inhabitants of Indiana spread across 23 health systems and 93 hospitals, as well as other state laboratory reporting state vitals data. The INPC patient population represents a variety of health systems spread across Indiana [<xref ref-type="bibr" rid="ref10">10</xref>] (representation of COVID-19 patient dataset is illustrated in detail in the Results section). This is relevant given that Indiana is representative of the total US population in terms of age, gender, education levels [<xref ref-type="bibr" rid="ref11">11</xref>] and urban-rural divide [<xref ref-type="bibr" rid="ref12">12</xref>]. For each patient, CoRDaCo includes data captured between January 1, 2018, and November 30, 2020. The data pull was performed by specialized analysts from the Regenstrief Institute Data Core—the only personnel permitted direct access to identifiable patient data within the INPC research database.</p>
      </sec>
      <sec>
        <title>Preparation of Feature Sets</title>
        <p>We extracted and vectorized a wide variety of patient-level features representing their demographics; diagnoses; past encounter history; medications; and social determinants of health, defined as conditions in which people are born, grow, live, work and age [<xref ref-type="bibr" rid="ref13">13</xref>] (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <p>Creation of feature vectors for model development was performed by the authors using the python programming language.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>List of features extracted for model development.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="750"/>
            <thead>
              <tr valign="top">
                <td>Data type</td>
                <td>Description of features modeled</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Demographics</td>
                <td>Patient age, gender, race or ethnicity represented as integer and categorical variables</td>
              </tr>
              <tr valign="top">
                <td>Diagnosis data</td>
                <td>Represented as integer variables:<break/><list list-type="bullet"><list-item><p>Charlson comorbidity index [<xref ref-type="bibr" rid="ref14">14</xref>]</p></list-item></list><break/>Represented as a Boolean values:<break/><list list-type="bullet"><list-item><p>Presence of most commonly occurring chronic conditions [<xref ref-type="bibr" rid="ref15">15</xref>]</p></list-item><list-item><p>Diagnoses of addictions, behaviors, behavioral disorders, and narcotics use [<xref ref-type="bibr" rid="ref16">16</xref>]</p></list-item><list-item><p>Presence of 1000 most frequently reported diagnoses identified using the International Classification of Diseases</p></list-item></list></td>
              </tr>
              <tr valign="top">
                <td>Past encounter history</td>
                <td>Inpatient, outpatient, and emergency visits represented as counts</td>
              </tr>
              <tr valign="top">
                <td>Medications</td>
                <td>Medications categorized into diagnosis groups and represented as Boolean values</td>
              </tr>
              <tr valign="top">
                <td>Social determinants of health</td>
                <td>Represented as a Boolean values:<break/><list list-type="bullet"><list-item><p>Socioeconomic status (unemployment, type of insurance)</p></list-item><list-item><p>Education</p></list-item><list-item><p>Neighborhood and physical environment</p></list-item><list-item><p>Urban vs rural status classified using Rural-Urban commuting area (RUCA) codes [<xref ref-type="bibr" rid="ref17">17</xref>]</p></list-item><list-item><p>Employment</p></list-item><list-item><p>Social support networks</p></list-item><list-item><p>Access to health care according to the Kaiser Family Foundation framework [<xref ref-type="bibr" rid="ref18">18</xref>]</p></list-item></list>All features were inferred using patient-level diagnosis codes and patient address information.</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Development of a Gold Standard</title>
        <p>We parsed past encounter history data on each patient to identify those who had been hospitalized (defined as patients who had been admitted to either inpatient or intensive care) within either of the following:</p>
        <list list-type="bullet">
          <list-item>
            <p>The first week of receiving a diagnosis of COVID-19 (ie, 1-week cohort), including a measure of which patients were in need of urgent care at the time of, or soon after, diagnosis.</p>
          </list-item>
          <list-item>
            <p>The first 6 weeks of receiving a diagnosis of COVID-19 (ie, 6-week cohort). A metric of which patients would need inpatient care during the course of their illness [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
          </list-item>
        </list>
        <p>To ensure that our gold standard focused on inpatient or intensive care unit stays influenced by COVID-19 alone, we applied regular expressions to patient admission reason notes in order to identify and exclude any admissions due to accidents such as falls, injuries, lacerations, and fractures, as well as suicidal ideation, overdoses, and alcohol abuse. These factors were selected for exclusion based on an assessment of the most frequently occurring admission reasons identified from patient hospitalization datasets.</p>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> represents our approach to feature vector preparation and detection of outcomes of interest for analytical modelling based on the patient’s longitudinal health history.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Feature vector preparation and detection of outcomes of interest based on the patient’s longitudinal health history.</p>
          </caption>
          <graphic xlink:href="jmir_v23i11e31337_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Machine Learning Process</title>
        <p>We leveraged Python and the scikit-learn machine learning library [<xref ref-type="bibr" rid="ref20">20</xref>] to train prediction models using the eXtreme Gradient Boosting (XGBoost) algorithm [<xref ref-type="bibr" rid="ref21">21</xref>]. The XGBoost algorithm is an implementation of gradient-boosted ensemble decision trees [<xref ref-type="bibr" rid="ref22">22</xref>] designed to optimize speed and performance. XGBoost classification was selected because research conducted by ourselves, as well as other external groups found that ensemble decision trees performed compatibly, or better than other classification algorithms [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>] and because XGBoost could be trained using a smaller number of features than those required to train neural networks and other deep learning–based models, which enables ease of model development, interpretability, and explainability. We split each data vector into random groups of 80% (training and validation dataset) and 20% (holdout test set). We then leveraged the 80% training and validation dataset to train optimal models for each scenario by using 10-fold crossvalidation and hyperparameter turning and methods. To enable better generalization of each model, we applied the internal feature selection method of XGBoost [<xref ref-type="bibr" rid="ref25">25</xref>], which prioritizes feature importance based on average gain across all splits the feature is used in, to restrict models to a smaller subset of the most relevant features.</p>
      </sec>
      <sec>
        <title>Model Evaluation</title>
        <p>We assessed the performance of each decision model in the 20% holdout test dataset by using several performance metrics:</p>
        <list list-type="bullet">
          <list-item>
            <p>Positive predictive value, or <italic>precision</italic>: the likelihood that a positively identified case is truly positive.</p>
          </list-item>
          <list-item>
            <p>Sensitivity, or <italic>recall</italic>: the likelihood that a true positive case is correctly identified as positive.</p>
          </list-item>
          <list-item>
            <p>Specificity: the likelihood that a negative case is correctly identified as negative.</p>
          </list-item>
          <list-item>
            <p>F<sub>1</sub> score: the harmonic mean of model precision and recall scores.</p>
          </list-item>
          <list-item>
            <p>Accuracy: the likelihood that a prediction is correct.</p>
          </list-item>
          <list-item>
            <p>Area under the receiver operating curve (AUC-ROC): a metric representing the performance of a prediction model at all classification thresholds.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Evaluation of Analytical Performance Against Subpopulations</title>
        <p>As discussed previously, the COVID-19 pandemic has highlighted systemic disparities in patient outcomes and access to care based on race or ethnicity [<xref ref-type="bibr" rid="ref4">4</xref>], gender [<xref ref-type="bibr" rid="ref5">5</xref>], income level, and urban-rural divide [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. These disparities may be present in the datasets used to train analytical models, resulting in biased predictions that place privileged groups at a systematic advantage and unprivileged groups at a systematic disadvantage [<xref ref-type="bibr" rid="ref26">26</xref>]. To evaluate our models for such biases, we stratified the holdout test dataset by age, race or ethnicity, gender, and residence (urban vs rural), and we evaluated model performance across each stratified subpopulation by using the same performance metrics. <xref rid="figure2" ref-type="fig">Figure 2</xref> provides a comprehensive overview of our study approach.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Workflow presenting the complete study approach from data extraction to predictive model evaluation. CoRDaCo: COVID-19 Research Data Commons.</p>
          </caption>
          <graphic xlink:href="jmir_v23i11e31337_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Human Participants Research Approval</title>
        <p>This study was approved by the Indiana University’s Institutional Review Board (2005573466).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>The CoRDaCo dataset consisted of 230,981 patients with a positive COVID-19 diagnosis. However, we noted that a considerable number of these patients were out-of-state residents who visited health systems that were part of the INPC only to obtain COVID-19 tests or were Indiana residents whose only interaction with INPC-affiliated health systems were to undergo COVID-19 testing. As such, we had no clinical data beyond COVID-19 status on these patients. To enrich the quality of datasets used for model building, we excluded such patients by identifying and removing any patient whose only INPC record was a positive COVID-19 test result. This resulted in a total of 96,115 patients. We excluded an additional 89 patients owing to errors in their medical records, resulting in a total of 96,026 <italic>legacy patients</italic> to be included in our model development efforts. This legacy population was from a diverse race or ethnicity (27% Black, Hispanic, and others), predominantly adult (median age 47 years [33.73]), mostly urban (76,988/96,026, 80.17%), and had a larger representation of females (57,475/96,026, 59.85%). A total of 18,694 (19.47%) of these patients were hospitalized during the first week of being diagnosed with COVID-19, whereas 22,678 (23.62%) were hospitalized during the first 6 weeks of receiving a COVID-19 diagnosis.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Characteristics of the patient populations used for analytical model development.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="370"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Patient characteristics</td>
                <td>COVID-19 patient cohort</td>
                <td>Patients hospitalized during the first week</td>
                <td>Patients hospitalized during the first 6 weeks</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Gender, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>38,529 (40.12)</td>
                <td>8178 (43.75)</td>
                <td>9615 (42.40)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>57,475 (59.85)</td>
                <td>10,516 (56.25)</td>
                <td>13,062 (57.60)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unknown</td>
                <td>22 (0.02)</td>
                <td>0 (0)</td>
                <td>1 (0)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Race or ethnicity, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White, non-Hispanic</td>
                <td>70,238 (73.15)</td>
                <td>11,783 (63.03)</td>
                <td>14,737 (64.98)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black, non-Hispanic</td>
                <td>12,372 (12.88)</td>
                <td>4,104 (21.95)</td>
                <td>4666 (20.58)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic</td>
                <td>9882 (10.29)</td>
                <td>2171 (11.61)</td>
                <td>2,533 (11.17)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td>3534 (3.68)</td>
                <td>636 (3.40)</td>
                <td>742 (3.27)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Age (years), n (%)<sup>a</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Minors (&#60;18 years)</td>
                <td>7064 (7.36)</td>
                <td>638 (3.41)</td>
                <td>754 (3.34)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Adults (18-65 years)</td>
                <td>67,563 (70.36)</td>
                <td>11,330 (60.61)</td>
                <td>13,851 (61.08)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Older adults (&#62;65 years)</td>
                <td>21,177 (22.05)</td>
                <td>6726 (35.98)</td>
                <td>8074 (35.60)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unknown</td>
                <td>222 (0.23)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Residence, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Number of zip codes represented</td>
                <td>957 (99.90)</td>
                <td>678 (70.85)</td>
                <td>705 (73.67)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Living in an urban area</td>
                <td>76,988 (80.17)</td>
                <td>14,833 (79.35)</td>
                <td>17,910 (78.98)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Living in a rural area</td>
                <td>16,843 (17.54)</td>
                <td>3267 (17.48)</td>
                <td>4084 (18.01)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unknown</td>
                <td>2195 (2.29)</td>
                <td>594 (3.18)</td>
                <td>684 (3.02)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Encounters, mean (SD)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Outpatient visits</td>
                <td>7.715 (10.09)</td>
                <td>9.391 (13.18)</td>
                <td>9.530 (12.29)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Emergency room visits</td>
                <td>0.926 (2.25)</td>
                <td>2.431 (3.52)</td>
                <td>2.237 (3.45)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hospitalizations</td>
                <td>0.339 (1.35)</td>
                <td>0.938 (2.24)</td>
                <td>0.875 (2.19)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Chronic disease burden, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cancer</td>
                <td>3976 (4.14)</td>
                <td>1226 (6.56)</td>
                <td>1484 (6.54)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Diabetes with complications</td>
                <td>4340 (4.52)</td>
                <td>1903 (10.18)</td>
                <td>2222 (9.80)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Diabetes without complications</td>
                <td>10,819 (11.27)</td>
                <td>3845 (20.57)</td>
                <td>4506 (19.87)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Dementia</td>
                <td>2529 (2.63)</td>
                <td>648 (3.47)</td>
                <td>871 (3.84)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Chronic pulmonary disease</td>
                <td>10,755 (11.20)</td>
                <td>2364 (12.65)</td>
                <td>4338 (19.13)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Renal disease</td>
                <td>5449 (5.67)</td>
                <td>2397 (12.82)</td>
                <td>2794 (12.32)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Mean participant age: 47.039 years (21.43).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Model Development and Evaluation</title>
        <p>The feature preparation process (<xref ref-type="table" rid="table1">Table 1</xref>) resulted in a total of 1172 features for model training. To enable model generalizability and ease of interpretation, we restricted each model to approximately the most significant 100 features selected based on feature importance threshold drop-offs. <xref ref-type="table" rid="table3">Table 3</xref> presents performance metrics reported by each model across the 20% holdout test dataset. <xref rid="figure3" ref-type="fig">Figure 3</xref> presents the precision-recall and AUC-ROC curves for each prediction model. The subset of features included in each model is presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>Both models delivered strong performance metrics. However, the model for the 1-week cohort reported significantly greater specificity, accuracy, and AUC-ROC scores than the 6-week cohort model.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Predictive model performance.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="340"/>
            <col width="330"/>
            <col width="330"/>
            <thead>
              <tr valign="top">
                <td>Performance metric</td>
                <td>First week (95% CI)</td>
                <td>First 6 weeks (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Precision</td>
                <td>75.133 (73.445-76.822)</td>
                <td>73.697 (72.142-75.253)</td>
              </tr>
              <tr valign="top">
                <td>Sensitivity</td>
                <td>52.505 (50.875-54.136)</td>
                <td>52.571 (51.081-54.061)</td>
              </tr>
              <tr valign="top">
                <td>Specificity</td>
                <td>95.780 (95.457-96.104)</td>
                <td>94.269 (93.887-94.653)</td>
              </tr>
              <tr valign="top">
                <td>Accuracy</td>
                <td>87.326 (86.846-87.806)</td>
                <td>84.514 (83.992-85.037)</td>
              </tr>
              <tr valign="top">
                <td>AUC-ROC<sup>a</sup></td>
                <td>88.744 (88.136-89.205)</td>
                <td>86.215 (85.773-87.091)</td>
              </tr>
              <tr valign="top">
                <td>F<sub>1</sub> score</td>
                <td>61.814 (60.092-63.535)</td>
                <td>61.367 (59.797-62.936)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>AUC-ROC: area under the receiver operating curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Precision-recall and AUC-ROC: area under the receiver operating curve (AUC-ROC) curves for each prediction model.</p>
          </caption>
          <graphic xlink:href="jmir_v23i11e31337_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Evaluation of Analytical Performance Against Subpopulations</title>
        <p>To assess model performance across different subpopulations of interest, we stratified the holdout test dataset by age, race or ethnicity, gender, and residence (urban vs rural), and we then evaluated their performance using each performance metric. <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref> present statistically significant variations in predictive performance reported across each model. Comprehensive predictive performance metrics, together with 95% CIs are listed in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. AUC-ROC curves for the performance of models across each stratified subpopulation are presented in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Statistically significant performance variations in model to predict health care resource utilization within the first week.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="180"/>
            <col width="180"/>
            <col width="240"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Performance metric</td>
                <td>Urban vs rural</td>
                <td>Male vs female</td>
                <td>Minors vs adults vs older adults</td>
                <td>White vs Black vs Hispanic</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Precision</td>
                <td>No difference</td>
                <td>No difference</td>
                <td>No difference</td>
                <td>No difference</td>
              </tr>
              <tr valign="top">
                <td>Sensitivity or recall</td>
                <td>Urban &#62; rural</td>
                <td>Male &#62; female</td>
                <td>Older adults &#62; (adults = minors)</td>
                <td>Black &#62; Hispanic &#62; White</td>
              </tr>
              <tr valign="top">
                <td>Specificity</td>
                <td>No difference</td>
                <td>No difference</td>
                <td>Minors &#62; adults &#62; older adults</td>
                <td>(White and Hispanic) &#62; black</td>
              </tr>
              <tr valign="top">
                <td>Accuracy</td>
                <td>No difference</td>
                <td>No difference</td>
                <td>Minors &#62; adults &#62; older adults</td>
                <td>(White and Hispanic) &#62; black</td>
              </tr>
              <tr valign="top">
                <td>AUC-ROC<sup>a</sup></td>
                <td>No difference</td>
                <td>Male &#62; female</td>
                <td>Minors &#62; adults &#62; older adults</td>
                <td>No difference</td>
              </tr>
              <tr valign="top">
                <td>F<sub>1</sub> score</td>
                <td>Urban &#62; rural</td>
                <td>Male &#62; female</td>
                <td>(Older adults = minors) &#62; adults</td>
                <td>(Black and Hispanic) &#62; White</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>AUC-ROC: area under the receiver operating curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Statistically significant performance variations in model to predict health care resource utilization within the first 6 weeks.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="180"/>
            <col width="180"/>
            <col width="240"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Performance metric</td>
                <td>Urban vs rural</td>
                <td>Male vs female</td>
                <td>Minors vs adults vs older adults</td>
                <td>White vs Black vs Hispanic</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Precision</td>
                <td>No difference</td>
                <td>No difference</td>
                <td>No difference</td>
                <td>No difference</td>
              </tr>
              <tr valign="top">
                <td>Sensitivity or recall</td>
                <td>Urban &#62; rural</td>
                <td>Male &#62; female</td>
                <td>Older adults &#62; (adults = minors)</td>
                <td>Black &#62; Hispanic &#62; White</td>
              </tr>
              <tr valign="top">
                <td>Specificity</td>
                <td>No difference</td>
                <td>No difference</td>
                <td>Minors &#62; adult &#62; senior</td>
                <td>White &#38; Hispanic &#62; black</td>
              </tr>
              <tr valign="top">
                <td>Accuracy</td>
                <td>No difference</td>
                <td>No difference</td>
                <td>Minors &#62; adult &#62; senior</td>
                <td>No difference</td>
              </tr>
              <tr valign="top">
                <td>AUC-ROC<sup>a</sup></td>
                <td>Urban &#62; rural</td>
                <td>Male &#62; female</td>
                <td>Minors &#62; adult &#62; senior</td>
                <td>No difference</td>
              </tr>
              <tr valign="top">
                <td>F<sub>1</sub> score</td>
                <td>Urban &#62; rural</td>
                <td>Male &#62; female</td>
                <td>(Older adults = minors) &#62; adults</td>
                <td>Black &#62; Hispanic &#62; White</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>AUC-ROC: area under the receiver operating curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>As presented in <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>, there were no statistically significant differences in precision scores reported across each strata or model under test. However, we found evidence of significant variations in model performance across many other strata. Across both models and all performance metrics under test, residing in an urban area was associated with comparable, or higher predictive performance than if residing in a rural area. Across both models and all performance metrics under test, being male was associated with comparable, or higher predictive performance than if female. Performance stratified by age showed significant variations, with some performance metrics favoring older adults while others favored minors. These results are indicative of biases learned from underlying data sources used for model development, or inefficient learning parameters implemented by the machine learning algorithm.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our results demonstrate the ability to train decision models capable of predicting the need of COVID-19–related hospitalization across a broad, statewide patient population with considerable performance accuracy. The 1-week model for predicting the need of COVID-19–related hospitalization reported specificity, accuracy, and AUC-ROC scores that were significantly larger than the 6-week model. The findings are intuitive given that hospitalization risk is more predictable over shorter time frames. Such utilization prediction models may be used for population health management programs in health systems, to identify high-risk populations to monitor or screen, as well as predicting resource need in crisis situations, such as future spikes in pandemic activity or outbreaks.</p>
        <p>Stratification of model performance across age, race or ethnicity, gender, and urban versus rural divide identified statistically significant variations in model performance across subpopulations. Each model and stratified subpopulation under test reported precision scores &#62;70%, accuracy and AUC-ROC scores &#62;80%, and sensitivity scores approximately &#62;90%. We note that recall scores for each model (approximately 50%-54%) were lower than ideal, implying that a considerable proportion of patients in need of health care services were being ignored. However, model precision, which is indicative of what percentage of patients identified by the model actually needed care was high (&#62;70%), suggesting that it was pragmatic for use in clinical settings. Additionally, model specificity scores were very high (approximately &#62;90%). This finding indicated that the models were able to correctly identify patients who were not in need of care with very high accuracy, which is very valuable in making clinical decisions on which patients to prioritize.</p>
        <p>Features that influenced the prediction of health care resource utilization included patient age [<xref ref-type="bibr" rid="ref27">27</xref>], chronic obstructive pulmonary disease status [<xref ref-type="bibr" rid="ref28">28</xref>], smoking [<xref ref-type="bibr" rid="ref28">28</xref>], diabetes [<xref ref-type="bibr" rid="ref29">29</xref>], indication of neurological diseases via diagnosis (eg, dementia [<xref ref-type="bibr" rid="ref30">30</xref>]) or medications (eg, anti-Parkinson and related therapy agents), mental disorders (eg, anxiety disorders), residence (urban vs rural) [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>], and income-level, measured on the basis of the type of insurance used by the patient. None of the patient-level social determinants of health factors extracted from the International Classification of Diseases diagnosis data were found to be impactful enough for inclusion in either model. This could be attributed to the scarcity of these elements being captured in clinical settings. However, patient-level features on the type of insurance (which is indicative of an individual’s financial and employment status) and RUCA code (which could be used to infer an individual’s income level, isolation, and access to services and health resources) were both widely available. These elements were found to be impactful and were integrated into both models.</p>
        <p>Each model exhibited significant variations in predictive performance across subpopulations. Overall, male gender or living in an urban area was associated with stronger predictive performance. These differences may be influenced by variations in access to health care services or health care delivery prevalent in the datasets, and the models could learn them during the training process. We cannot make further assumptions on the causes of varying model predictions without a proper assessment of underlying causes of this behavior.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>We noted several limitations to this study. We leveraged statewide datasets from the INPC HIE system to ensure that our models could be operationalized across a broad geographic region. As such, our modeling did not include data elements that were collected by health systems but not shared with the INPC. Since the collection of such datasets and their availability at the HIE level may vary based on the health system, the inclusion of such elements may impact the generalizability of our models across different health systems. Our use cases assessed the need of hospitalization during the first 6 weeks of diagnosis. This excludes the needs of patients suffering long-COVID, where patients may not fully recover for several months [<xref ref-type="bibr" rid="ref33">33</xref>]. Models were trained using <italic>legacy patients</italic>, who were participants of the INPC system prior to March 1, 2020. It is unclear how the models will perform against other patients who do not regularly interact with the health system and sought care only for COVID-19 testing purposes. This is concerning given that such patients may suffer from a higher disease burden. Our modelling efforts covered a broad time period spanning several waves of the COVID-19 pandemic, as well as the enforcement and relaxation of various mandates aimed at controlling COVID-19 infection rates. These changes may have influenced the capacity of hospital systems resulting in changes in how many patients were provided inpatient care. Alternatively, hospital admission and emergency management protocols may have also changed throughout this period, further impacting which patients received care. Our current effort did not consider how these variations influence the training datasets, and as such, how our models would generalize across future outbreaks and mandates, as COVID-19 infection rates continue to change. Future research will systematically investigate and calibrate model performance across different stages of the pandemic.</p>
        <p>We sought to demonstrate the ability to develop broad, state-level models for COVID-19–related research. As such, the biases in analytical models detected in this study highlight significant concerns that researchers must protect against. These biases in analytical model performance will be addressed during the next phase of our work. Further, although the generalizability of our models across other states is untested, they can influence other emerging COVID-19 analytical efforts. In particular, these models can influence data collection, curation, and modeling activities undertaken by the National COVID Cohort Collaborative (N3C) [<xref ref-type="bibr" rid="ref34">34</xref>], which is stewarded by the National Center for Advancing Translational Sciences and hosts data on over 250,000 COVID-19–positive patients from 31 sites spread across the United States. N3C could serve as an in-vivo laboratory for our research efforts.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study presents the possibility of developing decision models capable of predicting patient-level health care resource utilization across a broad, statewide region with considerable predictive performance. However, the analytical models present statistically significant variations in performance across stratified subpopulations of interest. Further efforts are necessary to identify root causes of these biases and to rectify them.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>List of top-ranking features included in each predictive model.</p>
        <media xlink:href="jmir_v23i11e31337_app1.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Performance metrics reported by each analytical model across each stratified subpopulation of the study.</p>
        <media xlink:href="jmir_v23i11e31337_app2.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Area under the receiver operating curves (AUC-ROCs) for the performance of models across each stratified subpopulation.</p>
        <media xlink:href="jmir_v23i11e31337_app3.png" xlink:title="PNG File , 261 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC-ROC</term>
          <def>
            <p>Area under the receiver operating curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CoRDaCo</term>
          <def>
            <p>COVID-19 Research Data Commons</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">HIE</term>
          <def>
            <p>Health Information Exchange</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">INPC</term>
          <def>
            <p>Indiana Network for Patient Care</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">N3C</term>
          <def>
            <p>National COVID Cohort Collaborative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RUCA</term>
          <def>
            <p>Rural-Urban commuting area</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">XGBoost</term>
          <def>
            <p>eXtreme Gradient Boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared. </p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Estiri</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Strasser</surname>
              <given-names>ZH</given-names>
            </name>
            <name name-style="western">
              <surname>Klann</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Naseri</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wagholikar</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
          </person-group>
          <article-title>Predicting COVID-19 mortality with electronic medical records</article-title>
          <source>NPJ Digit Med</source>
          <year>2021</year>
          <month>03</month>
          <day>04</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-021-00383-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-021-00383-x</pub-id>
          <pub-id pub-id-type="medline">33542473</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-021-00383-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC7862405</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hartley</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Perencevich</surname>
              <given-names>EN</given-names>
            </name>
          </person-group>
          <article-title>Public Health Interventions for COVID-19: Emerging Evidence and Implications for an Evolving Public Health Crisis</article-title>
          <source>JAMA</source>
          <year>2020</year>
          <month>05</month>
          <day>19</day>
          <volume>323</volume>
          <issue>19</issue>
          <fpage>1908</fpage>
          <lpage>1909</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2020.5910</pub-id>
          <pub-id pub-id-type="medline">32275299</pub-id>
          <pub-id pub-id-type="pii">2764656</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Haase</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>XQ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>BX</given-names>
            </name>
          </person-group>
          <article-title>The experiences of health-care providers during the COVID-19 crisis in China: a qualitative study</article-title>
          <source>The Lancet Global Health</source>
          <year>2020</year>
          <month>06</month>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>e790</fpage>
          <lpage>e798</lpage>
          <pub-id pub-id-type="doi">10.1016/s2214-109x(20)30204-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dorn</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Cooney</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Sabin</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 exacerbating inequalities in the US</article-title>
          <source>The Lancet</source>
          <year>2020</year>
          <month>04</month>
          <volume>395</volume>
          <issue>10232</issue>
          <fpage>1243</fpage>
          <lpage>1244</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(20)30893-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>MK</given-names>
            </name>
          </person-group>
          <article-title>Gender inequalities during COVID-19</article-title>
          <source>Group Processes &#38; Intergroup Relations</source>
          <year>2021</year>
          <month>03</month>
          <day>04</day>
          <volume>24</volume>
          <issue>2</issue>
          <fpage>237</fpage>
          <lpage>245</lpage>
          <pub-id pub-id-type="doi">10.1177/1368430220984248</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Iyanda</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Boakye</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Oppong</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Racial/ethnic heterogeneity and rural-urban disparity of COVID-19 case fatality ratio in the USA: a negative binomial and GIS-based analysis</article-title>
          <source>J Racial Ethn Health Disparities</source>
          <year>2021</year>
          <month>03</month>
          <day>26</day>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33638102"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40615-021-01006-7</pub-id>
          <pub-id pub-id-type="medline">33638102</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40615-021-01006-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC7909733</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Derakhshan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cutter</surname>
              <given-names>SL</given-names>
            </name>
          </person-group>
          <article-title>Urban-rural differences in COVID-19 exposures and outcomes in the South: A preliminary analysis of South Carolina</article-title>
          <source>PLoS One</source>
          <year>2021</year>
          <month>2</month>
          <day>3</day>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>e0246548</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0246548"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0246548</pub-id>
          <pub-id pub-id-type="medline">33534870</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-31728</pub-id>
          <pub-id pub-id-type="pmcid">PMC7857563</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <article-title>Regenstrief – IU partnership offers fast, secure access to COVID-19 data for research</article-title>
          <source>Regenstrief Institute</source>
          <year>2021</year>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.regenstrief.org/article/regenstrief-iu-cordaco-partnership-announcement/">https://www.regenstrief.org/article/regenstrief-iu-cordaco-partnership-announcement/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Overhage</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schadow</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Blevins</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dexter</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Mamlin</surname>
              <given-names>B</given-names>
            </name>
            <collab>INPC Management Committee</collab>
          </person-group>
          <article-title>The Indiana network for patient care: a working local health information infrastructure. An example of a working infrastructure collaboration that links data from five health systems and hundreds of millions of entries</article-title>
          <source>Health Aff (Millwood)</source>
          <year>2005</year>
          <month>09</month>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>1214</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1377/hlthaff.24.5.1214</pub-id>
          <pub-id pub-id-type="medline">16162565</pub-id>
          <pub-id pub-id-type="pii">24/5/1214</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <source>Indiana Health Information Exchange – Overview</source>
          <year>2017</year>
          <month>03</month>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ihie.org/wp-content/uploads/2017/05/IHIE-Overview_March-2017.pdf">https://www.ihie.org/wp-content/uploads/2017/05/IHIE-Overview_March-2017.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <article-title>Quick Facts, United States</article-title>
          <source>United States Census Bureau</source>
          <year>2021</year>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.census.gov/quickfacts/fact/table/US/RHI525219">https://www.census.gov/quickfacts/fact/table/US/RHI525219</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <article-title>State Fact Sheets</article-title>
          <source>Economic Research Service - U.S. Department of Agriculture</source>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ers.usda.gov/data-products/state-fact-sheets/">https://www.ers.usda.gov/data-products/state-fact-sheets/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <article-title>Social determinants of health</article-title>
          <source>World Health Organization</source>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/social_determinants/sdh_definition/en/">https://www.who.int/social_determinants/sdh_definition/en/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charlson</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Charlson</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Marinopoulos</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Briggs</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Hollenberg</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>The Charlson comorbidity index is adapted to predict costs of chronic disease in primary care patients</article-title>
          <source>J Clin Epidemiol</source>
          <year>2008</year>
          <month>12</month>
          <volume>61</volume>
          <issue>12</issue>
          <fpage>1234</fpage>
          <lpage>1240</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2008.01.006</pub-id>
          <pub-id pub-id-type="medline">18619805</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(08)00030-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Posner</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Parekh</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Koh</surname>
              <given-names>HK</given-names>
            </name>
          </person-group>
          <article-title>Defining and measuring chronic conditions: imperatives for research, policy, program, and practice</article-title>
          <source>Prev Chronic Dis</source>
          <year>2013</year>
          <month>04</month>
          <day>25</day>
          <volume>10</volume>
          <fpage>E66</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/pcd/issues/2013/12_0239.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.5888/pcd10.120239</pub-id>
          <pub-id pub-id-type="medline">23618546</pub-id>
          <pub-id pub-id-type="pii">E66</pub-id>
          <pub-id pub-id-type="pmcid">PMC3652713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <source>Dominion Diagnostics</source>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.dominiondiagnostics.com/sites/default/files/ICD-10-CM%20Addiction%20Med%20%26%20Behavioral%20Health%202017.pdf">https://www.dominiondiagnostics.com/sites/default/files/ICD-10-CM%20Addiction%20Med%20%26%20Behavioral%20Health%202017.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <article-title>Rural-Urban Commuting Area Codes (2021)</article-title>
          <source>U.S. Department of Agriculture – Economic Research Service</source>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ers.usda.gov/data-products/rural-urban-commuting-area-codes/">https://www.ers.usda.gov/data-products/rural-urban-commuting-area-codes/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Artiga</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Beyond health care: the role of social determinants in promoting health and health equity 2018</article-title>
          <source>Henry J Kaiser Family Foundation</source>
          <year>2018</year>
          <month>05</month>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://files.kff.org/attachment/issue-brief-beyond-health-care">http://files.kff.org/attachment/issue-brief-beyond-health-care</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <article-title>WHO Director-General's opening remarks at the media briefing on COVID-19</article-title>
          <source>World Health Organization</source>
          <year>2020</year>
          <month>03</month>
          <day>11</day>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/director-general/speeches/detail/who-director-general-s-opening-remarks-at-the-media-briefing-on-covid-19---11-march-2020">https://www.who.int/director-general/speeches/detail/who-director-general-s-opening-remarks-at-the-media-briefing-on-covid-19---11-march-2020</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2011</year>
          <month>10</month>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>30</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
            <collab>editors</collab>
          </person-group>
          <article-title>XGBoost: a scalable tree boosting system</article-title>
          <year>2016</year>
          <conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>785</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Stochastic gradient boosted distributed decision trees</article-title>
          <year>2009</year>
          <conf-name>Proceedings of the 18th ACM Conference on Information and Knowledge Management</conf-name>
          <conf-date>November 2009</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>2061</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1145/1645953.1646301</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kasthurirathne</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Grannis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Halverson</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Morea</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Menachemi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Vest</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Precision health-enabled machine learning to identify need for wraparound social services using patient- and population-level data sets: algorithm development and validation</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>07</month>
          <day>09</day>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>e16129</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/7/e16129/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16129</pub-id>
          <pub-id pub-id-type="medline">32479414</pub-id>
          <pub-id pub-id-type="pii">v8i7e16129</pub-id>
          <pub-id pub-id-type="pmcid">PMC7380999</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Didrik</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Tree Boosting With XGBoost - Why Does XGBoost Win "Every" Machine Learning Competition?</article-title>
          <source>Norwegian University of Science and Technology</source>
          <year>2016</year>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ntnuopen.ntnu.no/ntnu-xmlui/handle/11250/2433761">https://ntnuopen.ntnu.no/ntnu-xmlui/handle/11250/2433761</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <article-title>Python API Reference</article-title>
          <source>XGBoosT</source>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://xgboost.readthedocs.io/en/latest/python/python_api.html?highlight=get_score#xgboost.Booster.get_score">https://xgboost.readthedocs.io/en/latest/python/python_api.html?highlight=get_score#xgboost.Booster.get_score</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferryman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pitcan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Fairness in precision medicine</article-title>
          <source>Data &#38; Society</source>
          <year>2018</year>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://datasociety.net/library/fairness-in-precision-medicine/">https://datasociety.net/library/fairness-in-precision-medicine/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lithander</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Neumann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tenison</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lloyd</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Welsh</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rodrigues</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Higgins</surname>
              <given-names>Julian P T</given-names>
            </name>
            <name name-style="western">
              <surname>Scourfield</surname>
              <given-names>Lily</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>Hannah</given-names>
            </name>
            <name name-style="western">
              <surname>Haunton</surname>
              <given-names>Victoria J</given-names>
            </name>
            <name name-style="western">
              <surname>Henderson</surname>
              <given-names>Emily J</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 in older people: a rapid clinical review</article-title>
          <source>Age Ageing</source>
          <year>2020</year>
          <month>07</month>
          <day>01</day>
          <volume>49</volume>
          <issue>4</issue>
          <fpage>501</fpage>
          <lpage>515</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32377677"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ageing/afaa093</pub-id>
          <pub-id pub-id-type="medline">32377677</pub-id>
          <pub-id pub-id-type="pii">5831205</pub-id>
          <pub-id pub-id-type="pmcid">PMC7239238</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lian</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The impact of COPD and smoking history on the severity of COVID-19: A systemic review and meta-analysis</article-title>
          <source>J Med Virol</source>
          <year>2020</year>
          <month>10</month>
          <day>17</day>
          <volume>92</volume>
          <issue>10</issue>
          <fpage>1915</fpage>
          <lpage>1921</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32293753"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/jmv.25889</pub-id>
          <pub-id pub-id-type="medline">32293753</pub-id>
          <pub-id pub-id-type="pmcid">PMC7262275</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>Renjie</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Haijun</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Yin</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>Keye</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Lei</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>Heng</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Shanshan</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Desheng</given-names>
            </name>
          </person-group>
          <article-title>Diabetes is a risk factor for the progression and prognosis of COVID-19</article-title>
          <source>Diabetes Metab Res Rev</source>
          <year>2020</year>
          <month>03</month>
          <day>31</day>
          <fpage>e3319</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32233013"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/dmrr.3319</pub-id>
          <pub-id pub-id-type="medline">32233013</pub-id>
          <pub-id pub-id-type="pmcid">PMC7228407</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zuin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guasti</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Roncon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cervellati</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zuliani</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Dementia and the risk of death in elderly patients with COVID‐19 infection: Systematic review and meta‐analysis</article-title>
          <source>Int J Geriatr Psychiatry</source>
          <year>2020</year>
          <month>11</month>
          <day>24</day>
          <volume>36</volume>
          <issue>5</issue>
          <fpage>697</fpage>
          <lpage>703</lpage>
          <pub-id pub-id-type="doi">10.1002/gps.5468</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Souch</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Cossman</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>A commentary on rural-urban disparities in COVID-19 testing rates per 100,000 and risk factors</article-title>
          <source>J Rural Health</source>
          <year>2021</year>
          <month>01</month>
          <volume>37</volume>
          <issue>1</issue>
          <fpage>188</fpage>
          <lpage>190</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32282964"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jrh.12450</pub-id>
          <pub-id pub-id-type="medline">32282964</pub-id>
          <pub-id pub-id-type="pmcid">PMC7262182</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Rural-urban disparities in knowledge, behaviors, and mental health during COVID-19 pandemic: A community-based cross-sectional survey</article-title>
          <source>Medicine</source>
          <year>2021</year>
          <volume>100</volume>
          <issue>13</issue>
          <fpage>e25207</fpage>
          <pub-id pub-id-type="doi">10.1097/md.0000000000025207</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nabavi</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Long covid: How to define it and how to manage it</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <month>09</month>
          <day>07</day>
          <volume>370</volume>
          <fpage>m3489</fpage>
          <pub-id pub-id-type="doi">10.1136/bmj.m3489</pub-id>
          <pub-id pub-id-type="medline">32895219</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <article-title>National COVID Cohort Collaborative (N3C)</article-title>
          <source>National Institutes of Health</source>
          <access-date>2021-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ncats.nih.gov/n3c">https://ncats.nih.gov/n3c</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
