<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e52794</article-id>
      <article-id pub-id-type="pmid">39499554</article-id>
      <article-id pub-id-type="doi">10.2196/52794</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Machine Learning–Based Prediction for Incident Hypertension Based on Regular Health Checkup Data: Derivation and Validation in 2 Independent Nationwide Cohorts in South Korea and Japan</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Li</surname>
            <given-names>Yike</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Higaki</surname>
            <given-names>Akinori</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jeong</surname>
            <given-names>Changwon</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Hwang</surname>
            <given-names>Seung Ha</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-0066-3047</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Hayeon</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-2403-6241</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Jun Hyuk</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-0358-4760</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Myeongcheol</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-7185-9471</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Koyanagi</surname>
            <given-names>Ai</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9565-5004</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>Lee</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5340-9833</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Rhee</surname>
            <given-names>Sang Youl</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0119-5818</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Yon</surname>
            <given-names>Dong Keon</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff8" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1628-9948</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Jinseok</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Engineering</institution>
            <institution>Kyung Hee University</institution>
            <addr-line>1732 Deogyeong-daero</addr-line>
            <addr-line>Yongin, 17104</addr-line>
            <country>Republic of Korea</country>
            <fax>82 312012571</fax>
            <phone>82 312012570</phone>
            <email>gonasago@khu.ac.kr</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8580-490X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Engineering</institution>
        <institution>Kyung Hee University</institution>
        <addr-line>Yongin</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Center for Digital Health, Medical Science Research Institute</institution>
        <institution>Kyung Hee University College of Medicine</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Health and Human Science</institution>
        <institution>University of Southern California</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Regulatory Science</institution>
        <institution>Kyung Hee University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Research and Development Unit</institution>
        <institution>Parc Sanitari Sant Joan de Deu</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Centre for Health, Performance and Wellbeing</institution>
        <institution>Anglia Ruskin University</institution>
        <addr-line>Cambridge</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Department of Endocrinology and Metabolism</institution>
        <institution>Kyung Hee University School of Medicine</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Department of Pediatrics, Kyung Hee University Medical Center</institution>
        <institution>Kyung Hee University College of Medicine</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jinseok Lee <email>gonasago@khu.ac.kr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>5</day>
        <month>11</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e52794</elocation-id>
      <history>
        <date date-type="received">
          <day>15</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>9</day>
          <month>2</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>4</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>17</day>
          <month>9</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Seung Ha Hwang, Hayeon Lee, Jun Hyuk Lee, Myeongcheol Lee, Ai Koyanagi, Lee Smith, Sang Youl Rhee, Dong Keon Yon, Jinseok Lee. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 05.11.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e52794" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Worldwide, cardiovascular diseases are the primary cause of death, with hypertension as a key contributor. In 2019, cardiovascular diseases led to 17.9 million deaths, predicted to reach 23 million by 2030.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study presents a new method to predict hypertension using demographic data, using 6 machine learning models for enhanced reliability and applicability. The goal is to harness artificial intelligence for early and accurate hypertension diagnosis across diverse populations.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Data from 2 national cohort studies, National Health Insurance Service-National Sample Cohort (South Korea, n=244,814), conducted between 2002 and 2013 were used to train and test machine learning models designed to anticipate incident hypertension within 5 years of a health checkup involving those aged ≥20 years, and Japanese Medical Data Center cohort (Japan, n=1,296,649) were used for extra validation. An ensemble from 6 diverse machine learning models was used to identify the 5 most salient features contributing to hypertension by presenting a feature importance analysis to confirm the contribution of each future.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The Adaptive Boosting and logistic regression ensemble showed superior balanced accuracy (0.812, sensitivity 0.806, specificity 0.818, and area under the receiver operating characteristic curve 0.901). The 5 key hypertension indicators were age, diastolic blood pressure, BMI, systolic blood pressure, and fasting blood glucose. The Japanese Medical Data Center cohort dataset (extra validation set) corroborated these findings (balanced accuracy 0.741 and area under the receiver operating characteristic curve 0.824). The ensemble model was integrated into a public web portal for predicting hypertension onset based on health checkup data.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Comparative evaluation of our machine learning models against classical statistical models across 2 distinct studies emphasized the former’s enhanced stability, generalizability, and reproducibility in predicting hypertension onset.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>machine learning</kwd>
        <kwd>hypertension</kwd>
        <kwd>cardiovascular disease</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>cause of death</kwd>
        <kwd>cardiovascular risk</kwd>
        <kwd>predictive analytics</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The World Health Organization (WHO) has identified cardiovascular diseases (CVDs) as the leading cause of mortality worldwide, with a staggering 17.9 million deaths recorded in 2019 [<xref ref-type="bibr" rid="ref1">1</xref>]. This number is projected to rise to approximately 23 million by 2030. Of the multitude of CVDs, specific conditions such as myocardial infarction and ischemic stroke account for more than 85% of these CVD-related deaths [<xref ref-type="bibr" rid="ref2">2</xref>]. The US Centers for Disease Control and Prevention (CDC) have highlighted that CVDs caused over US $216 billion in overall health care expenses and resulted in US $147 billion lost due to increased workplace absenteeism and corresponding productivity in the United States. As a result, CVDs impose a significant burden on the nation’s economy [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      <p>Given the acknowledged biological and economic risks associated with CVDs, it is widely recognized that hypertension plays a significant role in these health complications, including myocardial infarction and stroke [<xref ref-type="bibr" rid="ref4">4</xref>]. Predicting hypertension onset is notably challenging due to the disease’s multifactorial origins, encompassing a wide range of genetic, environmental, and lifestyle factors. The subtle and often interrelated effects of these factors contribute to the complexity of early detection. For example, genetic predispositions may interact with lifestyle choices such as diet, exercise, and smoking habits, in ways that are not fully understood [<xref ref-type="bibr" rid="ref5">5</xref>]. Environmental influences, including socioeconomic status and access to health care, further complicate the picture by affecting both the risk of developing hypertension and the ability to manage risk factors effectively [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Additionally, the asymptomatic nature of hypertension in its early stages means that it often goes unnoticed until more serious health issues arise, making timely and accurate prediction all the more difficult [<xref ref-type="bibr" rid="ref7">7</xref>]. These challenges underscore the need for sophisticated predictive models that can integrate and analyze the myriad of contributing factors to identify individuals at risk of developing hypertension early in its progression. Considering the severe societal implications of hypertension across all nations, early diagnosis is crucial to mitigate its potential hazards. In this study, we propose a novel approach to predict the onset of hypertension using the population’s regular health checkup and demographic factors. In recent years, machine learning models have emerged as powerful tools across many fields, particularly in medical applications [<xref ref-type="bibr" rid="ref8">8</xref>]. Their ability to analyze complex patterns and make accurate predictions has revolutionized how we approach health care challenges.</p>
      <p>However, ensuring this methodology’s replicability and broad applicability in real-world settings presents an intricate challenge. To bolster the reliability of our hypertension projections, we conducted additional independent validation using distinct cohorts. This study investigated various machine learning approaches to strengthen the method’s robustness, replicability, and real-world practicality. We delved into the hypertension landscape across Asian populations through machine learning optics, firmly anchoring our methodology within the burgeoning realm of artificial intelligence (AI)–driven disciplines. This research endeavors to amplify our comprehension of global hypertension trends by channeling multifaceted machine learning analyses, thereby catalyzing more timely and precise diagnostic efforts.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source</title>
        <p>We used 2 national, large-scale, and general population–based cohort studies: the National Health Insurance Service-National Sample Cohort (NHIS-NSC; N=973,303) and the Japanese Medical Data Center cohort (JMDC; N=12,143,715). This study was approved by the institutional review board of National Health Insurance Service, Kyung Hee University (KHSIRB-23-085[EA]), and the JMDC (PHP-00002201-04). The requirement for informed consent was waived as this study used deidentified administrative data.</p>
      </sec>
      <sec>
        <title>NHIS-NSC (Discovery Cohort)</title>
        <p>The NHIS-NSC [<xref ref-type="bibr" rid="ref9">9</xref>], the population-based, nationwide, and large-scale cohort of South Korea, were from those aged ≥20 years who received general health checkups between January 1, 2002, and December 31, 2013. We used the NHIS-NSC to train, validate, and test the machine learning model to predict the presence or absence of hypertension within 5 years of a regular (yearly) health checkup. Hypertension was defined for patients who had received diagnoses with I10, I11, I12, I13, or I15 codes from the <italic>ICD-10</italic> (<italic>International Classification of Disease, 10th revision</italic>) ≥2 times and were using antihypertensives [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
        <p>During the data preprocessing phase, we transformed the cohort into a machine learning dataset by representing each eligible individual once, with all features recorded from their initial health check-up. The ground truth was determined by the occurrence of a hypertension event within the subsequent 5 years. We excluded participants with baseline hypertension or those lost to follow-up from this study. Individuals who developed hypertension after 5 years were classified as nonhypertensive for this study.</p>
        <p>In this study, we excluded participant information that fulfilled one of the following criteria among the 973,303 registered participants: (1) those who had reported “yes” for hypertension in the questionnaire; (2) those who had a prior diagnosis of hypertension with I10, I11, I12, I13, or I15 codes of <italic>ICD-10</italic> before the health checkup; (3) those with missing data for information and questionnaire; (4) those who had died before the year 2013; and (5) those who have blood pressure over the criteria of hypertension (systolic blood pressure ≥140 mm Hg or the diastolic blood pressure is ≥90 mm Hg). A graphical representation of the subject exclusion process of the NHIS-NSC is illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref> [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study population and data selection process in the NHIS-NSC (Korea) and JMDC (Japan). NHIS-NSC: National Health Insurance Service-National Sample Cohort; JMDC: Japanese Medical Data Center cohort.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e52794_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>JMDC (Validation Cohort)</title>
        <p>The JMDC dataset is the medical examination data from multiple health insurance associations in Japan since 2005 [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Given the machine learning model trained from the NHIS-NSC, we used the JMDC data for extra validation. We also applied the same exclusion criteria used in the NHIS-NSC to the JMDC data, resulting in the use of only 1,296,649 participant data of the total 12,143,715 data available. A graphical representation of the subject exclusion process of the JMDC dataset is also illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
      </sec>
      <sec>
        <title>Study Design</title>
        <p>To develop the machine learning model for predicting the presence or absence of hypertension within 5 years of a regular (yearly) health checkup, we used the following 18 available variables as the model’s input: age, sex, region of residence, household income, systolic blood pressure, diastolic blood pressure, fasting blood glucose, serum total cholesterol, hemoglobin, aspartate transaminase (AST), alanine transaminase (ALT), γ-glutamyl transpeptidase (γ-GTP), BMI, history of diabetes mellitus, history of stroke (including ischemic stroke, hemorrhagic stroke, and/or transient ischemic attack), smoking status, alcohol intake, and physical activity [<xref ref-type="bibr" rid="ref15">15</xref>]. The variables used in our machine learning model are summarized in <xref ref-type="table" rid="table1">Table 1</xref>. More specifically, the region of residence was categorized into rural and urban. Household income was categorized into 11 scales (0 to 10) based on basic livelihood recipient and decile (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>); in 10 income deciles, the 5th decile is the reference median income. Compared to 5th decile (100%), 1st decile (the lowest income level) has an income of less than 30%, 2nd decile has an income of less than 50%, 3rd decile has an income of less than 70%, 4th decile has an income of less than 90%, 6th decile has an income of less than 130%, 7th decile has an income of less than 150%, 8th decile has an income of less than 200%, 9th decile has an income of less than 300%, and 10th decile (the highest income level) has an income of 300% or more. Basic livelihood recipients are individuals whose income falls within 1st decile (the lowest 30%) [<xref ref-type="bibr" rid="ref12">12</xref>]. Smoking status was categorized into never, former, and current smokers. Alcoholic intakes were categorized into rare (less than one time per week), 1-2, 3-4, and more than four times per week [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Physical activity was categorized into never, 1-2, 3-4, and 5-6 times per week, and every day. The statistical characteristics of the variables for the NHIS-NSC and JMDC are summarized in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>, respectively.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Baseline characteristics of subjects in the discovery cohort (National Health Insurance Service-National Sample Cohort, N=244,814).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="710"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Variables</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Sex, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Male</td>
                <td>117,642 (48.05)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Female</td>
                <td>127,172 (51.95)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Age (years), mean (SD)</td>
                <td>47.03 (13.29)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Region of residence, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Urban</td>
                <td>111,640 (45.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Rural</td>
                <td>133,174 (54.4)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Household income, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Basic livelihood recipient</td>
                <td>437 (0.18)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">
                  <bold>Income deciles (excluded basic livelihood recipients)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D1 (the lowest income level; ≤30th percentile)</td>
                <td>18,783 (7.67)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D2 (31st-50th percentile)</td>
                <td>19,399 (7.92)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D3 (51st-70th percentile)</td>
                <td>22,165 (9.05)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D4 (71st-90th percentile)</td>
                <td>24,498 (10.01)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D5 (91st-100th percentile)</td>
                <td>25,043 (10.23)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D6 (101st-130th percentile)</td>
                <td>26,137 (10.68)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D7 (131st-150th percentile)</td>
                <td>26,195 (10.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D8 (151st-200th percentile)</td>
                <td>26,562 (10.85)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D9 (201st-300th percentile)</td>
                <td>28,469 (11.63)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>D10 (high income level, &#62;300th percentile)</td>
                <td>27,126 (11.08)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Systolic blood pressure (mm Hg), mean (SD)</td>
                <td>111.6 (9.42)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Diastolic blood pressure (mm Hg), mean (SD)</td>
                <td>68.69 (6.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Fasting blood glucose (mg/dL), mean (SD)</td>
                <td>92 (23.48)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Serum total cholesterol (mg/dL), mean (SD)</td>
                <td>187.91 (35.84)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Hemoglobin (g/dL), mean (SD)</td>
                <td>13.78 (1.58)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Aspartate transaminase (U/L), mean (SD)</td>
                <td>23.82 (15.12)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Alanine transaminase (U/L), mean (SD)</td>
                <td>22.9 (21.32)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">γ-glutamyl transpeptidase (U/L), mean (SD)</td>
                <td>28.81 (37.6)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">BMI (kg/m<sup>2</sup>), mean (SD)</td>
                <td>22.8 (3)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">History of diabetes mellitus, n (%)</td>
                <td>4596 (1.88)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">History of stroke, n (%)</td>
                <td>376 (0.15)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Smoking status, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nonsmoker</td>
                <td>176,333 (72.03)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Ex-smoker</td>
                <td>9148 (3.74)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Current smoker</td>
                <td>59,333 (24.24)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Alcohol intake per week, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Rarely</td>
                <td>182,101 (74.38)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">1-2</td>
                <td>45,374 (18.53)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">3-4</td>
                <td>12,337 (5.04)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">≥5</td>
                <td>5002 (2.04)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Physical activity per week, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Never</td>
                <td>141,847 (57.94)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">1-2</td>
                <td>63,046 (25.75)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">3-4</td>
                <td>23,426 (9.57)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">5-6</td>
                <td>5828 (2.38)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Every day</td>
                <td>10,667 (4.36)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Baseline characteristics of subjects in the validation cohort (Japanese Medical Data Center cohort; N=1,296,649).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="650"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Variables</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Sex, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>754,055 (58.15)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>542,594 (41.85)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Age (years), mean (SD)</td>
                <td>42.51 (10.24)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Systolic blood pressure (mm Hg), mean (SD)</td>
                <td>111.65 (10.43)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Diastolic blood pressure (mm Hg), mean (SD)</td>
                <td>67.88 (7.55)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Fasting blood glucose (mg/dL), mean (SD)</td>
                <td>91.45 (14.31)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Serum total cholesterol (mg/dL), mean (SD)</td>
                <td>200.05 (35.25)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Hemoglobin (g/dL), mean (SD)</td>
                <td>14.19 (1.56)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Aspartate transaminase (U/L), mean (SD)</td>
                <td>20.84 (8.89)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Alanine transaminase (U/L), mean (SD)</td>
                <td>21.02 (15.86)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">γ-glutamyl transpeptidase (U/L), mean (SD)</td>
                <td>30.63 (33.48)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">BMI (kg/m<sup>2</sup>), mean (SD)</td>
                <td>22.12 (3.22)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">History of diabetes mellitus, n (%)</td>
                <td>14,345 (1.11)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">History of stroke, n (%)</td>
                <td>3616 (0.28)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Smoking, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No</td>
                <td>978,245 (75.44)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Yes</td>
                <td>318,404 (24.56)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Alcohol intake per week, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rarely</td>
                <td>669,090 (51.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sometimes</td>
                <td>403,527 (31.12)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Every day</td>
                <td>224,052 (17.28)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Physical activity, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No</td>
                <td>1,082,572 (83.49)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Yes</td>
                <td>214,077 (16.51)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Proposed Machine Learning Models</title>
        <p>In this study, we split the NHIS-NSC dataset (n=244,814) into train (n=195,851) and internal test (n=48,963) data with a ratio of 8:2 in a stratified fashion. The internal test set was used only for an independent test of our developed AI model and not for training or internal validation. The JMDC (n=1,296,649) was used as the external validation dataset in this study.</p>
        <p>The data distribution was severely imbalanced: the ratio of hypertension and nonhypertension group was 1:15.32. To minimize the bias toward the majority group (nonhypertension) of the prediction model, we up-sampled the hypertension data using a synthetic minority oversampling technique during the model update [<xref ref-type="bibr" rid="ref16">16</xref>]. In addition, in the preprocessing stage, we performed standard scaler normalization for all features: we calculated the mean and SD of each feature from the training dataset and then normalized all feature values from both the test dataset and external validation datasets to have a mean of 0 and a SD of 1.</p>
        <p>To predict hypertension occurrence within 5 years based on regular health check-ups, we applied 6 machine learning models from 18 features: Extreme Gradient Boosting, random forest, gradient boosting machine (GBM), Light GBM, Adaptive Boosting (AdaBoost), and logistic regression (LR) [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Subsequently, we chose the best 3 among the 6 models and applied an ensemble approach by considering all possible combinations [<xref ref-type="bibr" rid="ref19">19</xref>]. Performance evaluations were based on 5-fold cross-validation using the train data following metrics: sensitivity, specificity, accuracy, balanced accuracy, and area under the receiver operating characteristics (AUROC) [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. To compare the predictive performance of the models, we performed a Cochrane Q test on the model performance [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Due to the significant data imbalance, we used balanced accuracy as the primary model evaluation metric. Moreover, we also estimated additional metrics to comprehensively evaluate the performance of each model: precision, <italic>F</italic><sub>1</sub>-score, and area under the precision-recall curve. To address the issue of inappropriate precision and <italic>F</italic><sub>1</sub>-score under the severe data imbalance, we measured weighted average precision and weighted average <italic>F</italic><sub>1</sub>-score, accounting for the differences in class sizes. Finally, we presented its feature importance analysis, listing features in the order they contributed to hypertension prediction within 5 years of regular health checkups.</p>
        <p>We implemented the models using Python (version 3.9.16; Python Software Foundation) with TensorFlow (version 2.9.1; Google LLC), Keras (version 2.9.0; Google LLC), NumPy (version 1.21.5; NumFOCUS, Inc), Pandas (version 1.4.4; NumFOCUS, Inc), Matplotlib (version 3.5.2; NumFOCUS, Inc), and Scikit-learn (version 1.0.2; NumFOCUS, Inc) [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. All statistical analysis was performed using SAS (version 9.4, SAS Institute Inc) [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      </sec>
      <sec>
        <title>Feature Importance</title>
        <p>To analyze the effect of each feature on predicting hypertension occurrence, we performed the feature importance analysis to confirm the contribution of each feature. For tree-based models, the mean decrease in impurity (MDI), which is also known as Gini importance, is used to assess feature importance [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. The following equation represents MDI [<xref ref-type="bibr" rid="ref24">24</xref>]:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e52794_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e52794_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>T</italic> is the total number of trees in the base estimator, is the set of all nodes in tree <italic>t</italic>, is the feature used for splitting the node <italic>m</italic>, ​ is the decrease in impurity at node <italic>m</italic>, is the Gini impurity, is the number of samples at node <italic>m</italic>, and is an indicator function that is 1 if feature <italic>j</italic> is used for splitting at node <italic>m</italic> and 0 otherwise. Notation of <italic>l</italic> and <italic>r</italic> indicates left child node and right child node each. Those equations calculate feature importance by summing up the impurity reductions caused by each feature across all trees in the model. A higher MDI indicates greater feature importance. For LR, we used the regression coefficient to calculate feature importance. The following equation indicates the LR model [<xref ref-type="bibr" rid="ref25">25</xref>]:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e52794_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e52794_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>d</italic> is the number of features. The regression coefficient describes the average change in the dependent variable for each 1-unit change in the independent variable for continuous independent variables or the expected difference versus a reference category for categorical independent variables. Further, for ensembled models, we calculated feature importance by averaging the standardized feature importance from each model used for the ensemble.</p>
      </sec>
      <sec>
        <title>Risk Factors</title>
        <p>We further investigated the association between the occurrence of hypertension and independent variables using univariate and multivariate LR analyses in the discovery and validation cohorts [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Predictor variables included categorical variables (sex, region of residence, history of diseases, smoking status, alcohol intake, and physical activity) and continuous variables transformed into categorical form (age, household income, BMI, blood pressure, fasting blood glucose, serum total cholesterol, hemoglobin, ALT, AST, and γ-GTP). Univariable and multivariable LR analyses were conducted for each variable to estimate the odds ratio and 95% CI for the occurrence of hypertension. All statistical analyses were conducted using SAS (version 9.4, SAS Institute) [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The claims-based cohort data in South Korea and Japan were anonymous, and this study’s protocol was approved by the Institutional Review Board of National Health Insurance Service, Kyung Hee University (KHSIRB-23-085(EA)), and the JMDC (PHP-00002201-04).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>K-Fold Cross-Validation</title>
        <p>For the 6 machine learning models, we found the following optimized hyper-parameters using grid search with 5-fold cross-validation: For Extreme Gradient Boosting, we used booster type of gradient boosted tree, column subsample by tree 0.1, learning rate 0.2, maximum depth of 3, and number of estimators 100. For random forest, we used maximum depth of 3, maximum features of 3, minimum samples per leaf 3, minimum samples per split 3, number of estimators 50, and balanced class weight. For Light GBM, we used boosting parameter of gradient-based 1-sided sampling, objective function of binary classification objective, evaluation metrics of log loss function for binary classification, learning rate 0.002, number of estimators 70, and number of leaves 30. For GBM, we used learning rate 0.008, maximum depth of 2, minimum samples per leaf 3, minimum samples per split 3, and number of estimators 100. For AdaBoost, we used algorithm of stagewise additive modeling using a multiclass exponential loss function, real variant; number of tree estimators with 500; and learning rate with 0.02. For LR, the solver of the library for large linear classification; the penalty norm was with L2, inverse of regularization strength 0.1, and the maximum number of iterations was with 100. For ensemble models, we used the same hyperparameters as those used in the individual machine learning models. Additionally, no weights were applied when combining the models in the ensemble. After finding the optimal hyperparameters, we checked the performance of each model and ensemble model. To improve performance, we tuned the models to use optimal thresholds through AUROC [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. The optimized threshold values for some models are as follows: 0.48 for AdaBoost, 0.46 for GBM and AdaBoost, 0.46 for LR and GBM, and 0.46 for the GBM, AdaBoost, and LR. <xref ref-type="table" rid="table3">Table 3</xref> summarizes the 5-fold cross-validation accuracy comparison of each model and ensemble machine learning models using sensitivity, specificity, accuracy, balanced accuracy, and AUROC as evaluation metrics. Among the single models, GBM, LR, and AdaBoost demonstrated the best prediction performance per balanced accuracy and AUROC. To further improve the classification performance, we explored an ensemble approach using the top-3 single models: GBM, LR, and AdaBoost. The results show that the combination of LR and AdaBoost provides the highest performance with a sensitivity of 80.62%, specificity of 81.79%, balanced accuracy of 81.2%, and AUROC of 0.9012. In addition, we also summarize 3 additional metrics suitable for imbalanced data in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>: weighted average precision, weighted average <italic>F</italic><sub>1</sub>-score, and area under the precision-recall curve.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of the prediction performances of the prediction models on the training dataset in the discovery cohort<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="130"/>
            <col width="140"/>
            <col width="140"/>
            <col width="150"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Sensitivity, mean (SD)</td>
                <td>Specificity, mean (SD)</td>
                <td>Accuracy, mean (SD)</td>
                <td>Balanced accuracy, mean (SD)</td>
                <td>AUROC<sup>b</sup>, mean (SD)</td>
                <td><italic>P</italic> values<sup>c</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>AdaBoost<sup>d</sup></td>
                <td>0.8503 (0.0074)</td>
                <td>0.7725 (0.0048)</td>
                <td>0.7764 (0.0044)</td>
                <td>0.8114 (0.0023)</td>
                <td>0.9136 (0.0035)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>LR<sup>e</sup></td>
                <td>0.8009 (0.0090)</td>
                <td>0.8076 (0.0015)</td>
                <td>0.8072 (0.0012)</td>
                <td>0.8042 (0.0041)</td>
                <td>0.8819 (0.0046)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>XGBoost<sup>f</sup></td>
                <td>0.6208 (0.011)</td>
                <td>0.9599 (0.0029)</td>
                <td>0.943 (0.0029)</td>
                <td>0.7904 (0.006)</td>
                <td>0.8866 (0.0052)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Random forest</td>
                <td>0.7328 (0.012)</td>
                <td>0.8642 (0.0098)</td>
                <td>0.8577 (0.009)</td>
                <td>0.7985 (0.0052)</td>
                <td>0.8875 (0.0056)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Light GBM<sup>g</sup></td>
                <td>0.8295 (0.0052)</td>
                <td>0.7649 (0.0033)</td>
                <td>0.7681 (0.0032)</td>
                <td>0.7972 (0.0038)</td>
                <td>0.8743 (0.0058)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>GBM</td>
                <td>0.7853 (0.0065)</td>
                <td>0.8194 (0.0027)</td>
                <td>0.8176 (0.0029)</td>
                <td>0.8023 (0.0046)</td>
                <td>0.8942 (0.0051)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>GBM and AdaBoost</td>
                <td>0.8194 (0.0063)</td>
                <td>0.8006 (0.0065)</td>
                <td>0.8016 (0.0061)</td>
                <td>0.81 (0.0035)</td>
                <td>0.9063 (0.0044)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>LR and GBM</td>
                <td>0.8221 (0.0081)</td>
                <td>0.7934 (0.0012)</td>
                <td>0.7949 (0.0012)</td>
                <td>0.8078 (0.0041)</td>
                <td>0.9009 (0.0048)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>GBM, AdaBoost, and LR</td>
                <td>0.8373 (0.0076)</td>
                <td>0.7795 (0.0014)</td>
                <td>0.7824 (0.0011)</td>
                <td>0.8084 (0.0034)</td>
                <td>0.9065 (0.0047)</td>
                <td> &#60;.001</td>
              </tr>
              <tr valign="top">
                <td>AdaBoost and LR</td>
                <td>0.8062 (0.0072)<sup>h</sup></td>
                <td>0.8179 (0.0015) <sup>h</sup></td>
                <td>0.8173 (0.0012) <sup>h</sup></td>
                <td>0.8120 (0.0030) <sup>h</sup></td>
                <td>0.9012 (0.0046) <sup>h</sup></td>
                <td> Reference</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>All outcomes are averaged over 5-fold cross-validation.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>AUROC: area under receiver operating characteristic.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>To compare the predictive performance of the models, we performed a Cochrane Q test on the model performance.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>AdaBoost: Adaptive Boosting.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>XGBoost: Extreme Gradient Boosting.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>GBM: gradient boosting machine.</p>
            </fn>
            <fn id="table3fn8">
              <p><sup>h</sup>Indicates machine learning model with best performance of prediction.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Feature Importance Analysis</title>
        <p>The ranked normalized feature importance is from the ensemble model combining AdaBoost and LR. According to the results, age had the highest importance value among the features, followed by diastolic blood pressure, BMI, systolic blood pressure, and fasting blood glucose. Feature importances are as follows: age, 1.00; diastolic blood pressure, 0.93; BMI, 0.75; systolic blood pressure, 0.58; fasting blood glucose, 0.35; γ-GTP, 0.24; serum total cholesterol, 0.18; ALT, 0.10; AST, 0.097; history of diabetes mellitus, 0.087; household income, 0.77; hemoglobin, 0.025; sex, 0.021; history of stroke, 0.014; physical activity, 0.010; alcohol intake per week, 0.0077; region of residence, 0.0065; and smoking, 0.0055.</p>
      </sec>
      <sec>
        <title>Ablation Study</title>
        <p>Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> summarizes the ablation study results when one or some top 5-contribution features were excluded: age, diastolic blood pressure, BMI, systolic blood pressure, and fasting blood glucose. Without age, the model provides poor prediction performance: balanced accuracy dropped from 0.812 to 0.782, and AUROC dropped from 0.901 to 0.864. Without diastolic blood pressure, balanced accuracy dropped to 0.784, and AUROC dropped to 0.871. Without BMI, balanced accuracy dropped to 0.811, and AUROC dropped to 0.898. Without systolic blood pressure, balanced accuracy dropped to 0.801, and AUROC dropped to 0.890. Without fasting blood glucose, balanced accuracy dropped to 0.8118, and AUROC dropped to 0.9010. Additionally, we analyzed the model performance when the 2 blood pressure features (systolic and diastolic) were excluded. The results show that the performance significantly degraded across all accuracy metrics: balanced accuracy from 0.812 to 0.725 and AUROC from 0.901 to 0.797.</p>
      </sec>
      <sec>
        <title>Test Data Results and External Validation Results</title>
        <p><xref ref-type="table" rid="table4">Table 4</xref> summarizes the test data results from the test dataset from the NHIS-NSC and the external validation data results from the JMDC. The test data results also showed that the ensemble model combining AdaBoost and LR provides the highest value of balanced accuracy (0.8147). The similarity between the cross-validation and test data results denotes minimal overfitting or underfitting. The external validation data results also showed the ensemble model combining AdaBoost and LR provided the highest value of balanced accuracy (0.7406). The results confirmed that our ensemble model combining AdaBoost and LR could provide an accurate prediction of hypertension within 5 years based on the regular health checkup data.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Comparison of the prediction performances of the prediction models on the test dataset (discovery cohort) and the external validation dataset (validation cohort).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="320"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="160"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Model</td>
                <td>Sensitivity</td>
                <td>Specificity</td>
                <td>Accuracy</td>
                <td>Balanced accuracy</td>
                <td>AUROC<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="7">
                  <bold>Test dataset (discovery cohort)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AdaBoost<sup>b</sup></td>
                <td>0.8573</td>
                <td>0.7677</td>
                <td>0.7722</td>
                <td>0.8125</td>
                <td>0.9123</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR<sup>c</sup></td>
                <td>0.8093</td>
                <td>0.8064</td>
                <td>0.8066</td>
                <td>0.8078</td>
                <td>0.8832</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost<sup>d</sup></td>
                <td>0.6257</td>
                <td>0.9629</td>
                <td>0.9461</td>
                <td>0.7943</td>
                <td>0.8886</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td>0.8913</td>
                <td>0.6417</td>
                <td>0.6542</td>
                <td>0.7665</td>
                <td>0.8598</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Light GBM<sup>e</sup></td>
                <td>0.8230</td>
                <td>0.7707</td>
                <td>0.7733</td>
                <td>0.7968</td>
                <td>0.8714</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GBM</td>
                <td>0.7839</td>
                <td>0.8379</td>
                <td>0.8352</td>
                <td>0.8109</td>
                <td>0.8966</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GBM and AdaBoost</td>
                <td>0.8403</td>
                <td>0.7884</td>
                <td>0.7910</td>
                <td>0.8143</td>
                <td>0.9080</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR and GBM</td>
                <td>0.8297</td>
                <td>0.7988</td>
                <td>0.8004</td>
                <td>0.8143</td>
                <td>0.9039</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GBM, AdaBoost, and LR</td>
                <td>0.8443</td>
                <td>0.7841</td>
                <td>0.7871</td>
                <td>0.8142</td>
                <td>0.9087</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AdaBoost and LR</td>
                <td>0.8129<sup>f</sup></td>
                <td>0.8165<sup>f</sup></td>
                <td>0.8163<sup>f</sup></td>
                <td>0.8147<sup>f</sup></td>
                <td>0.9022<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>External validation dataset (validation cohort)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AdaBoost</td>
                <td>0.6724</td>
                <td>0.7906</td>
                <td>0.7840</td>
                <td>0.7315</td>
                <td>0.8148</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR</td>
                <td>0.6378</td>
                <td>0.8352</td>
                <td>0.8242</td>
                <td>0.7365</td>
                <td>0.8134</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost</td>
                <td>0.5253</td>
                <td>0.8724</td>
                <td>0.8530</td>
                <td>0.6989</td>
                <td>0.7906</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td>0.7109</td>
                <td>0.6715</td>
                <td>0.6737</td>
                <td>0.6912</td>
                <td>0.7324</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Light GBM</td>
                <td>0.5446</td>
                <td>0.8241</td>
                <td>0.8084</td>
                <td>0.6843</td>
                <td>0.7402</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GBM</td>
                <td>0.4995</td>
                <td>0.8869</td>
                <td>0.8652</td>
                <td>0.6932</td>
                <td>0.7875</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GBM and AdaBoost</td>
                <td>0.5906</td>
                <td>0.8349</td>
                <td>0.8212</td>
                <td>0.7127</td>
                <td>0.8052</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR and GBM</td>
                <td>0.6428</td>
                <td>0.8379</td>
                <td>0.8270</td>
                <td>0.7404</td>
                <td>0.8241</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GBM, AdaBoost, and LR</td>
                <td>0.6240</td>
                <td>0.8546</td>
                <td>0.8417</td>
                <td>0.7393</td>
                <td>0.8271</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AdaBoost and LR</td>
                <td>0.6354<sup>f</sup></td>
                <td>0.8458<sup>f</sup></td>
                <td>0.8341<sup>f</sup></td>
                <td>0.7406<sup>f</sup></td>
                <td>0.8242<sup>f</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>AUROC: area under receiver operating characteristic.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>AdaBoost: Adaptive Boosting.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>XGBoost: Extreme Gradient Boosting.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>GBM: gradient boosting machine.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>Indicates machine learning model with best performance of prediction.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Association Between Risk Factors and Occurrence of Hypertension</title>
        <p>The association between the occurrence of hypertension and potential risk factors is presented in Tables S4 and S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. In both the discovery and validation cohorts, consistently, the multivariable model revealed that older age, female sex, urban residence, high income, high blood pressure, high serum total cholesterol, high hemoglobin, high AST, high γ-GTP, high BMI, history of diabetes mellitus, history of stroke, frequent alcohol intake, and insufficient physical activity were significantly associated with an increased risk of hypertension.</p>
      </sec>
      <sec>
        <title>AI-Driven Web Application</title>
        <p>Our proposed ensemble model was deployed on our own public website [<xref ref-type="bibr" rid="ref30">30</xref>] so that hypertension onset within 5 years can be predicted based on regular health checkup data. The deployed web application, which provides results for prediction of hypertension onset, is shown in Figure S1 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. The web interface for entering information on 18 features from regular health checkup data is shown in Figure S1(a) in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. After entering the information in the web application, a user can immediately obtain the results for prediction of hypertension onset with its probability, as shown in Figure S1(b) in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. In the web application, the features input by a user are encoded to the website server, and immediately deleted upon generation of the prediction result, so that there is no risk of exposing information. In addition, there is no need to enter any information that would be regarded as private. Furthermore, we have open-sourced the Python code for the proposed ensemble model as publicly available in a GitHub repository [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Main Findings</title>
        <p>Given the significant health and economic consequences of CVDs, particularly myocardial infarction and stroke, it is essential to examine hypertension, a principal contributing factor to these conditions. This study uses data from 244,814 South Korean participants, obtained from the NHIS-NSC over a 12-year study period, and data from 1,296,649 Japanese participants, collected by the JMDC from various health insurance associations in Japan since 2005.</p>
        <p>Our findings indicated that an ensemble of AdaBoost and LR models provided superior performance, achieving a sensitivity of 80.62%, specificity of 81.79%, balanced accuracy of 81.2%, and AUROC of 90.12%, suggesting that quantifying the occurrence of hypertension using feature importance analysis with ensemble machine learning (AdaBoost and LR) can enhance generalizability and reproducibility.</p>
        <p>Using our knowledge of the machine learning model, our study has analyzed the occurrences of hypertension. Using feature importance analysis, our study has indicated the top 5-contribution features of hypertension, which were age, diastolic blood pressure, BMI, systolic blood pressure, and fasting blood glucose. Following the feature importance analysis, to measure the impact of such contributing features of hypertension, through an ablation study, we have excluded some contribution features among the top 5 contributing features of hypertension.</p>
        <p>From our further investigation into the association between hypertension and independent variables, we analyzed various risk factors. Our analysis revealed that older age, female sex, urban residence, high income, elevated blood pressure, high serum total cholesterol, elevated hemoglobin, high AST, high γ-GTP, high BMI, history of diabetes mellitus, history of stroke, frequent alcohol intake, and insufficient physical activity were significantly associated with an increased risk of hypertension.</p>
        <p>After obtaining test data results, through extra validation using the JMDC dataset, we have validated that our ensemble model combining AdaBoost and LR could provide an accurate prediction of hypertension within 5 years based on the regular health checkup data (balanced accuracy 0.741 and AUROC 0.824). Using such analysis of both the NHIS-NSC and JMDC as original and extra validation, our study has established a web application allowing diagnosis of hypertension [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>].</p>
      </sec>
      <sec>
        <title>Comparison With Previous Studies</title>
        <p>Similar to our study, past research efforts have worked on developing hypertension risk prediction models using variables akin to our study, including age, sex, BMI, blood pressure metrics, parental hypertension history, smoking habits, and in certain cases, additional markers such as C-reactive protein, apolipoprotein A, and uric acid [<xref ref-type="bibr" rid="ref35">35</xref>] (United States, n=1717 [<xref ref-type="bibr" rid="ref36">36</xref>], n=1130 [<xref ref-type="bibr" rid="ref37">37</xref>], n=15,732 [<xref ref-type="bibr" rid="ref38">38</xref>], n=876 [<xref ref-type="bibr" rid="ref39">39</xref>], and n=23,095 [<xref ref-type="bibr" rid="ref40">40</xref>]; United Kingdom, n=10,308 [<xref ref-type="bibr" rid="ref41">41</xref>]; and Iran, n=380 [<xref ref-type="bibr" rid="ref42">42</xref>]).</p>
        <p>Although there were several prior studies to find the occurrence of hypertension and establish web applications, many of these studies presented limitations, presenting problems such as producing low levels of reliability and yielding conflicting results. These constraints can be attributed to smaller sample sizes, short follow-up durations, and inadequate study designs such as nonrepresentative or nonrandom selection of populations [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Additionally, most of the studies have not held web application-fortifying processes, such as extra validation studies.</p>
        <p>Our research stands apart in this context. We used a longitudinal approach using extensive datasets from both South Korean and Japanese health insurance databases, encompassing a comprehensive range of hypertension-related data spanning over 10 years. By leveraging datasets from 244,814 individuals in South Korea and 1,296,649 in Japan, we implemented a 5-fold cross-validation for optimizing an ensemble machine learning model. This was followed by a feature importance analysis to identify the top 5 determinants of hypertension, an ablation study to gauge the significance of each contributing factor, and an additional validation procedure. As a result, our work culminated in the development of a robust machine learning-powered web application, a milestone that many preceding studies fell short of achieving.</p>
      </sec>
      <sec>
        <title>Possible Explanations for Our Results</title>
        <p>This study harnesses real-world data where conventional statistical methods often struggle to guarantee generalizability and reproducibility in real-life situations. However, such challenges can be surmounted with AI-powered machine learning techniques such as variable pruning and group optimization.</p>
        <p>By integrating machine learning methodologies, specifically AdaBoost and LR, our research can perpetually evaluate potential features linked to hypertension onset. This translates to a resilient system adept at observing the correlation between standardized traits and hypertension episodes, which include age, diastolic blood pressure, BMI, systolic blood pressure, and fasting blood glucose concentrations. Notably, this strategy offers considerable benefits, ensuring dependable data on hypertension prevalence across a wide demographic, even if the analysis encompasses merely a fraction of the overall populace. Additionally, our pioneering methodology offers a distinct advantage by ensuring accessibility even for individuals who may be illiterate or disinclined to participate in hypertension-specific screenings. This capacity for rapid diagnostic evaluation equips health care professionals with the tools to offer more targeted and accurate services to patients facing hypertension risks.</p>
      </sec>
      <sec>
        <title>Policy Implication</title>
        <p>Our diagnostic method’s validation, achieved via an ensemble machine learning strategy integrating AdaBoost and LR, consistently upholds accuracy in hypertension identification, even among newly discerned populations potentially susceptible to hypertension. This tool not only paves the way for preemptive hypertension identification but also extends its reach to individuals distant from conventional health care infrastructure, such as hospitals and regional health centers. Our study is keen on transitioning our web-based platform to a mobile app [<xref ref-type="bibr" rid="ref18">18</xref>], addressing and eliminating any accessibility barriers. Such an evolution positions our tool as a universally accessible resource, irrespective of an individual’s socioeconomic status, domicile, or the developmental index of their nation. Worldwide, national administrations can advocate for our tool, empowering citizens to independently gauge their hypertension risk and pursue timely medical interventions. The distinct advantage of our platform is its avoidance of potential diplomatic sensitivities, given its nonreliance on any personal or confidential data.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>An astute examination of this study’s findings calls for recognizing inherent limitations. To elaborate, even though our research draws on data from 2 distinct cohorts—the NHIS-NSC (n=244,814) for training or testing and the JMDC (n=1,296,649) for extra-validation—these datasets encompass but a marginal segment of the overarching Asian demographic, and an even lesser representation of the worldwide populace. This fact accentuates the imperative for our conclusions to be subjected to broader international validation studies and exhaustive longitudinal investigations. Furthermore, although the sample size of the JMDC is larger, its somewhat limited set of variables led us to develop a model using the comprehensive set of variables available in the NHIS-NSC [<xref ref-type="bibr" rid="ref43">43</xref>]. Despite the smaller size of the NHIS-NSC, our proposed ensemble model showed stable and consistent performance when validated with the JMDC. Moreover, it is crucial to acknowledge that our study tested a limited array of model types, excluding machine learning models such as k-nearest neighbors and support vector machines. Including these models would have provided a comprehensive comparison and potentially strengthened our findings. Additionally, our analysis did not include certain hypertension-related features, such as family history, dietary habits, and salt consumption. As it is well known that these factors play a significant role in the development and progression of hypertension, their absence may have influenced the predictive power of the models and the holistic understanding of hypertension risk factors. Lastly, segmenting related variables such as systolic and diastolic pressure, ALT, and AST can capture diverse aspects and reduce data loss, but it may dilute significance due to their correlation [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        <p>While the primary aim of our study has been to identify predictors for the onset of hypertension, we acknowledge that predicting the magnitude of blood pressure increases offers an invaluable perspective on the complex interplay between initial blood pressure levels and their changes over time. This area, although not explored within the current scope of our research, holds significant potential for advancing our understanding of hypertension. Future investigations that include baseline blood pressure measurements could yield profound insights into the risk factors and dynamics of blood pressure changes. Such research would enrich our predictive models and refine management strategies for hypertension, marking a crucial step forward in the field.</p>
        <p>Yet, amid these confines, one must not undermine this study’s significance. Our endeavor capitalizes on data meticulously gathered for over a decade from South Korea and Japan. In a methodical exercise of comparing a spectrum of 6 machine learning models and subsequently analyzing ensemble variations, we astutely pinpointed the critical determinants closely aligned with hypertension onset, ensuring commendable reproducibility and applicability. Furthermore, the genesis of a user-responsive web tool, facilitating individuals to input personal health metrics, epitomizes our groundbreaking stride toward expeditious, precision-driven, and worldwide accessible diagnostic avenues for hypertension.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In a pioneering endeavor, this research uniquely integrates both machine learning and conventional statistical frameworks to prognosticate the emergence of hypertension. A notable outcome of this exploration is the institution of a digital platform adept at forecasting a 5-year onset of hypertension, using data sourced from the NHIS-NSC and JMDC. Our empirical outcomes, extrapolated from 2 autonomous studies, substantiate that machine learning paradigms—particularly an amalgamation of AdaBoost and LR—eclipsed the traditional statistical methodologies in preempting hypertension onset. A meticulous inquiry was undertaken to ascertain the hierarchical significance of determinants linked with hypertension. The investigation earmarked age as the paramount factor, trailed by diastolic blood pressure, BMI, systolic blood pressure, and fasting blood glucose concentrations. This sequence, pivotal in curating the most efficacious machine learning model and subsequent hypertension emergence, was further corroborated via supplementary validation harnessing the JMDC datasets. Emerging from these discernments is an AI-infused digital interface, proficient in envisioning a quintennial likelihood of hypertension based on routine health assessment metrics. Such an innovation positions itself as an instrumental diagnostic conduit for individuals predisposed to hypertension.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Sensitivity analysis.</p>
        <media xlink:href="jmir_v26i1e52794_app1.docx" xlink:title="DOCX File , 52 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>AI-driven web application. AI: artificial intelligence.</p>
        <media xlink:href="jmir_v26i1e52794_app2.docx" xlink:title="DOCX File , 376 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ALT</term>
          <def>
            <p>alanine transaminase</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AST</term>
          <def>
            <p>aspartate transaminase</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CVD</term>
          <def>
            <p>cardiovascular disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">GBM</term>
          <def>
            <p>gradient boosting machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ICD-10</term>
          <def>
            <p>International Classification of Disease, 10th revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">JMDC</term>
          <def>
            <p>Japanese Medical Data Center cohort</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MDI</term>
          <def>
            <p>mean decrease in impurity</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NHIS-NSC</term>
          <def>
            <p>National Health Insurance Service-National Sample Cohort</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">γ-GTP</term>
          <def>
            <p>γ-glutamyl transpeptidase</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>No generative AI was used in any portion of this study. This research was supported by grants from the National Research Foundation of Korea, funded by the Korean Government (Ministry of Science and ICT: MSIT, ICT: Information and Communication Technology; RS-2023-00248157) and the MSIT, Korea, under the Information Technology Research Center (ITRC) support program (IITP-2024-RS-2024-00438239) supervised by the IITP (Institute for Information &#38; Communications Technology Planning &#38; Evaluation). The funders had no role in study design, data collection, data analysis, data interpretation, or writing of the report.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>Data are available on reasonable request to the corresponding authors and with appropriate ethical approvals.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>JL and DKY had full access to all of the data in this study and took responsibility for the integrity of the data and the accuracy of the data analysis. All authors approved the final version before submission. ML, AK, and LS reviewed this paper. SHH, HL, JHL, DKY, and JL did the study concept and design; worked on the acquisition, analysis, or interpretation of data; drafted this paper; and handled the statistical analysis. All authors critically revised this paper for important intellectual content. JL supervised this study and is the guarantor for this study. DKY and JL contributed equally to this study as corresponding authors. SHH, HL, and JHL contributed equally to this work as first authors. The corresponding author attests that all listed authors meet authorship criteria and that no others meeting the criteria have been omitted.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghani</surname>
              <given-names>MAA</given-names>
            </name>
            <name name-style="western">
              <surname>Ugusman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Latip</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zainalabidin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Role of terpenophenolics in modulating inflammation and apoptosis in cardiovascular diseases: a review</article-title>
          <source>Int J Mol Sci</source>
          <year>2023</year>
          <volume>24</volume>
          <issue>6</issue>
          <fpage>5339</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijms24065339"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijms24065339</pub-id>
          <pub-id pub-id-type="medline">36982410</pub-id>
          <pub-id pub-id-type="pii">ijms24065339</pub-id>
          <pub-id pub-id-type="pmcid">PMC10049039</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HC</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of cardiovascular disease and its risk factors in Korea</article-title>
          <source>Glob Health Med</source>
          <year>2021</year>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>134</fpage>
          <lpage>141</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34250288"/>
          </comment>
          <pub-id pub-id-type="doi">10.35772/ghm.2021.01008</pub-id>
          <pub-id pub-id-type="medline">34250288</pub-id>
          <pub-id pub-id-type="pmcid">PMC8239378</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arbogast</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Moore-Schiltz</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jarvis</surname>
              <given-names>WR</given-names>
            </name>
            <name name-style="western">
              <surname>Harpster-Hagen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Parker</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Impact of a comprehensive workplace hand hygiene program on employer health care insurance claims and costs, absenteeism, and employee perceptions and practices</article-title>
          <source>J Occup Environ Med</source>
          <year>2016</year>
          <volume>58</volume>
          <issue>6</issue>
          <fpage>e231</fpage>
          <lpage>e240</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27281645"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/JOM.0000000000000738</pub-id>
          <pub-id pub-id-type="medline">27281645</pub-id>
          <pub-id pub-id-type="pii">00043764-201606000-00025</pub-id>
          <pub-id pub-id-type="pmcid">PMC4883643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>NCD Risk Factor Collaboration (NCD-RisC)</collab>
          </person-group>
          <article-title>Worldwide trends in hypertension prevalence and progress in treatment and control from 1990 to 2019: a pooled analysis of 1201 population-representative studies with 104 million participants</article-title>
          <source>Lancet</source>
          <year>2021</year>
          <volume>398</volume>
          <issue>10304</issue>
          <fpage>957</fpage>
          <lpage>980</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/2318/1805296"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(21)01330-1</pub-id>
          <pub-id pub-id-type="medline">34450083</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(21)01330-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC8446938</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Braveman</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gottlieb</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>The social determinants of health: it's time to consider the causes of the causes</article-title>
          <source>Public Health Rep</source>
          <year>2014</year>
          <volume>129</volume>
          <issue>Suppl 2</issue>
          <fpage>19</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/24385661"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/00333549141291S206</pub-id>
          <pub-id pub-id-type="medline">24385661</pub-id>
          <pub-id pub-id-type="pmcid">PMC3863696</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Koyanagi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Fond</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Boyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rahmati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rhee</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Trends in hypertension prevalence, awareness, treatment, and control in South Korea, 1998-2021: a nationally representative serial study</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>21724</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-023-49055-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-023-49055-8</pub-id>
          <pub-id pub-id-type="medline">38066091</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-49055-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10709599</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Son</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tully</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Rahmati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hwang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rhee</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Prevalence, awareness, treatment, and control of type 2 diabetes in South Korea (1998 to 2022): nationwide cross-sectional study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2024</year>
          <volume>10</volume>
          <fpage>e59571</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2024//e59571/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/59571</pub-id>
          <pub-id pub-id-type="medline">39190907</pub-id>
          <pub-id pub-id-type="pii">v10i1e59571</pub-id>
          <pub-id pub-id-type="pmcid">PMC11387923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>IS</given-names>
            </name>
            <name name-style="western">
              <surname>Hur</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Huh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>SU</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Prognostic artificial intelligence model to predict 5 year survival at 1 year after gastric cancer surgery based on nutrition and body morphometry</article-title>
          <source>J Cachexia Sarcopenia Muscle</source>
          <year>2023</year>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>847</fpage>
          <lpage>859</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36775841"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/jcsm.13176</pub-id>
          <pub-id pub-id-type="medline">36775841</pub-id>
          <pub-id pub-id-type="pmcid">PMC10067496</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Koh</surname>
              <given-names>HY</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>MY</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Incidence of cancer after asthma development: 2 independent population-based cohort studies</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2021</year>
          <volume>147</volume>
          <issue>1</issue>
          <fpage>135</fpage>
          <lpage>143</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2020.04.041</pub-id>
          <pub-id pub-id-type="medline">32417133</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(20)30643-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>HY</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Won</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Kronbichler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Koyanagi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jacob</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Suh</surname>
              <given-names>DI</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Autoimmune inflammatory rheumatic diseases and COVID-19 outcomes in South Korea: a nationwide cohort study</article-title>
          <source>Lancet Rheumatol</source>
          <year>2021</year>
          <volume>3</volume>
          <issue>10</issue>
          <fpage>e698</fpage>
          <lpage>e706</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34179832"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2665-9913(21)00151-X</pub-id>
          <pub-id pub-id-type="medline">34179832</pub-id>
          <pub-id pub-id-type="pii">S2665-9913(21)00151-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC8213376</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>EK</given-names>
            </name>
          </person-group>
          <article-title>Cardiovascular research using the Korean national health information database</article-title>
          <source>Korean Circ J</source>
          <year>2020</year>
          <volume>50</volume>
          <issue>9</issue>
          <fpage>754</fpage>
          <lpage>772</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32725984"/>
          </comment>
          <pub-id pub-id-type="doi">10.4070/kcj.2020.0171</pub-id>
          <pub-id pub-id-type="medline">32725984</pub-id>
          <pub-id pub-id-type="pii">50.e90</pub-id>
          <pub-id pub-id-type="pmcid">PMC7441000</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nagai</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kodaira</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kimura</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Takahashi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nakayama</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Data resource profile: JMDC claims database sourced from health insurance societies</article-title>
          <source>J Gen Fam Med</source>
          <year>2021</year>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>118</fpage>
          <lpage>127</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33977008"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/jgf2.422</pub-id>
          <pub-id pub-id-type="medline">33977008</pub-id>
          <pub-id pub-id-type="pii">JGF2422</pub-id>
          <pub-id pub-id-type="pmcid">PMC8090843</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Koyanagi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fond</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Boyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rahmati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>López Sánchez</surname>
              <given-names>GF</given-names>
            </name>
            <name name-style="western">
              <surname>Dragioti</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cortese</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Suh</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Solmi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Min</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Fusar-Poli</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Short- and long-term neuropsychiatric outcomes in long COVID in South Korea and Japan</article-title>
          <source>Nat Hum Behav</source>
          <year>2024</year>
          <volume>8</volume>
          <issue>8</issue>
          <fpage>1530</fpage>
          <lpage>1544</lpage>
          <pub-id pub-id-type="doi">10.1038/s41562-024-01895-8</pub-id>
          <pub-id pub-id-type="medline">38918517</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-024-01895-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Koyanagi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Rahmati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Acute and post-acute respiratory complications of SARS-CoV-2 infection: population-based cohort study in South Korea and Japan</article-title>
          <source>Nat Commun</source>
          <year>2024</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>4499</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-024-48825-w"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-024-48825-w</pub-id>
          <pub-id pub-id-type="medline">38802352</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-024-48825-w</pub-id>
          <pub-id pub-id-type="pmcid">PMC11130304</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>IK</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Ha</surname>
              <given-names>EK</given-names>
            </name>
            <collab>Yeniova</collab>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Proton pump inhibitors and the risk of severe COVID-19: a post-hoc analysis from the Korean nationwide cohort</article-title>
          <source>Gut</source>
          <year>2021</year>
          <volume>70</volume>
          <issue>10</issue>
          <fpage>2013</fpage>
          <lpage>2015</lpage>
          <pub-id pub-id-type="doi">10.1136/gutjnl-2020-323672</pub-id>
          <pub-id pub-id-type="medline">33303566</pub-id>
          <pub-id pub-id-type="pii">gutjnl-2020-323672</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>WP</given-names>
            </name>
          </person-group>
          <article-title>SMOTE: synthetic minority over-sampling technique</article-title>
          <source>J Artif Intell Res</source>
          <year>2002</year>
          <volume>16</volume>
          <fpage>321</fpage>
          <lpage>357</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rhee</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Min</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Quantification of identifying cognitive impairment using olfactory-stimulated functional near-infrared spectroscopy with machine learning: a post hoc analysis of a diagnostic trial and validation of an external additional trial</article-title>
          <source>Alzheimers Res Ther</source>
          <year>2023</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>127</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://alzres.biomedcentral.com/articles/10.1186/s13195-023-01268-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13195-023-01268-9</pub-id>
          <pub-id pub-id-type="medline">37481573</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13195-023-01268-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC10362671</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Machine learning-based prediction of suicidality in adolescents during the COVID-19 pandemic (2020-2021): derivation and validation in two independent nationwide cohorts</article-title>
          <source>Asian J Psychiatr</source>
          <year>2023</year>
          <volume>88</volume>
          <fpage>103704</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ajp.2023.103704</pub-id>
          <pub-id pub-id-type="medline">37541104</pub-id>
          <pub-id pub-id-type="pii">S1876-2018(23)00260-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Asghari Varzaneh</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shanbehzadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kazemi-Arpanahi</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Prediction of successful aging using ensemble machine learning algorithms</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2022</year>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>258</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-022-02001-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-022-02001-6</pub-id>
          <pub-id pub-id-type="medline">36192713</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-022-02001-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC9527392</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Machine learning to predict the occurrence of thyroid nodules: towards a quantitative approach for judicious utilization of thyroid ultrasonography</article-title>
          <source>Front Endocrinol (Lausanne)</source>
          <year>2024</year>
          <volume>15</volume>
          <fpage>1385836</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38774231"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fendo.2024.1385836</pub-id>
          <pub-id pub-id-type="medline">38774231</pub-id>
          <pub-id pub-id-type="pmcid">PMC11106422</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rainio</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Teuho</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Klén</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Evaluation metrics and statistical tests for machine learning</article-title>
          <source>Sci Rep</source>
          <year>2024</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>6086</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-024-56706-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-024-56706-x</pub-id>
          <pub-id pub-id-type="medline">38480847</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-024-56706-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC10937649</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fond</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Boyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Machine learning-based prediction of suicidality in adolescents with allergic rhinitis: derivation and validation in 2 independent nationwide cohorts</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <volume>26</volume>
          <fpage>e51473</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e51473/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51473</pub-id>
          <pub-id pub-id-type="medline">38354043</pub-id>
          <pub-id pub-id-type="pii">v26i1e51473</pub-id>
          <pub-id pub-id-type="pmcid">PMC10902766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Basu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kumbier</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A debiased MDI feature importance measure for random forests</article-title>
          <year>2019</year>
          <conf-name>Proceedings of the 33rd International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>2019 Dec 08</conf-date>
          <conf-loc>United States</conf-loc>
          <fpage>8049</fpage>
          <lpage>8059</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Louppe</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wehenkel</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sutera</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Geurts</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Understanding variable importances in forests of randomized trees</article-title>
          <year>2013</year>
          <conf-name>Proceedings of the 26th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>2013 Dec 05</conf-date>
          <conf-loc>United States</conf-loc>
          <fpage>421</fpage>
          <lpage>439</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mathew</surname>
              <given-names>TE</given-names>
            </name>
          </person-group>
          <article-title>A logistic regression with recursive feature elimination model for breast cancer diagnosis</article-title>
          <source>Int J Emerging Technol</source>
          <year>2019</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>55</fpage>
          <lpage>63</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kawasoe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kubozono</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Salim</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshimine</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Mawatari</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ojima</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawabata</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ikeda</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Miyahara</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tokushige</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ido</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ohishi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Development of a risk prediction score and equation for chronic kidney disease: a retrospective cohort study</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>5001</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-023-32279-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-023-32279-z</pub-id>
          <pub-id pub-id-type="medline">36973534</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-32279-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC10042816</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>HG</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Koyanagi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jacob</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Min</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Fond</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Boyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Joo</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Yeo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rhee</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>National trends in sadness, suicidality, and COVID-19 pandemic-related risk factors among South Korean adolescents from 2005 to 2021</article-title>
          <source>JAMA Netw Open</source>
          <year>2023</year>
          <volume>6</volume>
          <issue>5</issue>
          <fpage>e2314838</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37223902"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.14838</pub-id>
          <pub-id pub-id-type="medline">37223902</pub-id>
          <pub-id pub-id-type="pii">2805276</pub-id>
          <pub-id pub-id-type="pmcid">PMC10209749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
          </person-group>
          <article-title>Regression analysis for continuous independent variables in medical research: statistical standard and guideline of life cycle committee</article-title>
          <source>Life Cycle</source>
          <year>2022</year>
          <volume>2</volume>
          <fpage>e3</fpage>
          <pub-id pub-id-type="doi">10.54724/lc.2022.e3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Machine learning to predict end stage kidney disease in chronic kidney disease</article-title>
          <source>Sci Rep</source>
          <year>2022</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>8377</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-022-12316-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-022-12316-z</pub-id>
          <pub-id pub-id-type="medline">35589908</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-022-12316-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC9120106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <source>Predicting hypertension with health checkup</source>
          <access-date>2024-10-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://ai-wm.khu.ac.kr/Hypertension/">http://ai-wm.khu.ac.kr/Hypertension/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>seunghahh / PredictingHypertension</article-title>
          <source>GitHub</source>
          <access-date>2024-10-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/seunghahh/PredictingHypertension">https://github.com/seunghahh/PredictingHypertension</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sáez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ferri</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>García-Gómez</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Resilient artificial intelligence in health: synthesis and research agenda toward next-generation trustworthy clinical decision support</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <volume>26</volume>
          <fpage>e50295</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e50295/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/50295</pub-id>
          <pub-id pub-id-type="medline">38941134</pub-id>
          <pub-id pub-id-type="pii">v26i1e50295</pub-id>
          <pub-id pub-id-type="pmcid">PMC11245653</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>YQ</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>DX</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>LY</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jing</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>ZW</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>GW</given-names>
            </name>
          </person-group>
          <article-title>Pitfalls in developing machine learning models for predicting cardiovascular diseases: challenge and solutions</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <volume>26</volume>
          <fpage>e47645</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e47645/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/47645</pub-id>
          <pub-id pub-id-type="medline">38869157</pub-id>
          <pub-id pub-id-type="pii">v26i1e47645</pub-id>
          <pub-id pub-id-type="pmcid">PMC11316160</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Son</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hammoodi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fond</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Boyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <article-title>Machine learning-based prediction of suicidal thinking in adolescents by derivation and validation in 3 independent worldwide cohorts: algorithm development and validation study</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <volume>26</volume>
          <fpage>e55913</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e55913/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/55913</pub-id>
          <pub-id pub-id-type="medline">38758578</pub-id>
          <pub-id pub-id-type="pii">v26i1e55913</pub-id>
          <pub-id pub-id-type="pmcid">PMC11143390</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Echouffo-Tcheugui</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Batty</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Kivimäki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kengne</surname>
              <given-names>AP</given-names>
            </name>
          </person-group>
          <article-title>Risk models to predict hypertension: a systematic review</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>e67370</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0067370"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0067370</pub-id>
          <pub-id pub-id-type="medline">23861760</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-08187</pub-id>
          <pub-id pub-id-type="pmcid">PMC3702558</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Parikh</surname>
              <given-names>NI</given-names>
            </name>
            <name name-style="western">
              <surname>Pencina</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Benjamin</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lanier</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>D'Agostino</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Kannel</surname>
              <given-names>WB</given-names>
            </name>
            <name name-style="western">
              <surname>Vasan</surname>
              <given-names>RS</given-names>
            </name>
          </person-group>
          <article-title>A risk score for predicting near-term incidence of hypertension: the Framingham heart study</article-title>
          <source>Ann Intern Med</source>
          <year>2008</year>
          <volume>148</volume>
          <issue>2</issue>
          <fpage>102</fpage>
          <lpage>110</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-148-2-200801150-00005</pub-id>
          <pub-id pub-id-type="medline">18195335</pub-id>
          <pub-id pub-id-type="pii">148/2/102</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>LaCroix</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Mead</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>KY</given-names>
            </name>
          </person-group>
          <article-title>The prediction of midlife coronary heart disease and hypertension in young adults: the Johns Hopkins multiple risk equations</article-title>
          <source>Am J Prev Med</source>
          <year>1990</year>
          <volume>6</volume>
          <issue>2 Suppl</issue>
          <fpage>23</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="medline">2383409</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kshirsagar</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bomback</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>August</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Viera</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Colindres</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A hypertension risk score for middle-aged and older adults</article-title>
          <source>J Clin Hypertens (Greenwich)</source>
          <year>2010</year>
          <volume>12</volume>
          <issue>10</issue>
          <fpage>800</fpage>
          <lpage>808</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21029343"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1751-7176.2010.00343.x</pub-id>
          <pub-id pub-id-type="medline">21029343</pub-id>
          <pub-id pub-id-type="pmcid">PMC3683833</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fitriyani</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Syafrudin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alfian</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rhee</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Development of disease prediction model based on ensemble learning approach for diabetes and hypertension</article-title>
          <source>IEEE Access</source>
          <year>2019</year>
          <volume>7</volume>
          <fpage>144777</fpage>
          <lpage>144789</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2019.2945129</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elshawi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Mallah</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Sakr</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>On the interpretability of machine learning-based model for predicting hypertension</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>146</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0874-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0874-0</pub-id>
          <pub-id pub-id-type="medline">31357998</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0874-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6664803</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kivimäki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Batty</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Singh-Manoux</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrie</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Tabak</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Jokela</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Marmot</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Shipley</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Validating the Framingham hypertension risk score: results from the Whitehall II study</article-title>
          <source>Hypertension</source>
          <year>2009</year>
          <volume>54</volume>
          <issue>3</issue>
          <fpage>496</fpage>
          <lpage>501</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19597041"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/HYPERTENSIONAHA.109.132373</pub-id>
          <pub-id pub-id-type="medline">19597041</pub-id>
          <pub-id pub-id-type="pii">HYPERTENSIONAHA.109.132373</pub-id>
          <pub-id pub-id-type="pmcid">PMC2828464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gorbani</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mahmoodi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sarbakhsh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shaghaghi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Predictive performance of Pender's health promotion model for hypertension control in Iranian patients</article-title>
          <source>Vasc Health Risk Manag</source>
          <year>2020</year>
          <volume>16</volume>
          <fpage>299</fpage>
          <lpage>305</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32764950"/>
          </comment>
          <pub-id pub-id-type="doi">10.2147/VHRM.S258458</pub-id>
          <pub-id pub-id-type="medline">32764950</pub-id>
          <pub-id pub-id-type="pii">258458</pub-id>
          <pub-id pub-id-type="pmcid">PMC7381821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Watada</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kawakita</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nishigaki</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nakajima</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Fujikawa</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Akazawa</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A claims-based cohort study on the treatment patterns of Japanese patients with type 2 diabetes mellitus and the association of early first physician visit with time to prescription of oral hypoglycemic agents</article-title>
          <source>Diabetes Ther</source>
          <year>2021</year>
          <volume>12</volume>
          <issue>7</issue>
          <fpage>2035</fpage>
          <lpage>2047</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34151415"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13300-021-01090-2</pub-id>
          <pub-id pub-id-type="medline">34151415</pub-id>
          <pub-id pub-id-type="pii">10.1007/s13300-021-01090-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC8266984</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>HY</given-names>
            </name>
            <name name-style="western">
              <surname>Yon</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Effenberger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Koyanagi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jacob</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>IK</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SW</given-names>
            </name>
          </person-group>
          <article-title>Non-alcoholic fatty liver disease and COVID-19 susceptibility and outcomes: a Korean nationwide cohort</article-title>
          <source>J Korean Med Sci</source>
          <year>2021</year>
          <volume>36</volume>
          <issue>41</issue>
          <fpage>e291</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jkms.org/DOIx.php?id=10.3346/jkms.2021.36.e291"/>
          </comment>
          <pub-id pub-id-type="doi">10.3346/jkms.2021.36.e291</pub-id>
          <pub-id pub-id-type="medline">34697932</pub-id>
          <pub-id pub-id-type="pii">36.e291</pub-id>
          <pub-id pub-id-type="pmcid">PMC8546310</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
