<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v27i1e71592</article-id><article-id pub-id-type="doi">10.2196/71592</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Biological Age Estimation From the Age Gap Using Deep Learning Integrating Morbidity and Mortality: Model Development and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Moon</surname><given-names>Seong-Eun</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Yoon</surname><given-names>Ji Won</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Bae</surname><given-names>Jae Hyun</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Joo</surname><given-names>Shinyoung</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kim</surname><given-names>Yoo Hyung</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Bon Hyang</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yoon</surname><given-names>Seokho</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Yoo</surname><given-names>Haanju</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Cho</surname><given-names>Young Min</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>NAVER AI Lab</institution><addr-line>Seongnam</addr-line><country>Republic of Korea</country></aff><aff id="aff2"><institution>Division of Endocrinology and Metabolism, Department of Internal Medicine, Seoul National University Hospital Healthcare System Gangnam Center</institution><addr-line>Seoul</addr-line><country>Republic of Korea</country></aff><aff id="aff3"><institution>Department of Internal Medicine, Seoul National University College of Medicine</institution><addr-line>101 Daehak-ro, Jongno-gu</addr-line><addr-line>Seoul</addr-line><country>Republic of Korea</country></aff><aff id="aff4"><institution>Division of Endocrinology and Metabolism, Department of Internal Medicine, Seoul National University Hospital</institution><addr-line>Seoul</addr-line><country>Republic of Korea</country></aff><aff id="aff5"><institution>DaNaA Data</institution><addr-line>Seoul</addr-line><country>Republic of Korea</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Leung</surname><given-names>Tiffany</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Okami</surname><given-names>Suguru</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Hartonen</surname><given-names>Tuomo</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Young Min Cho, MD, PhD, Department of Internal Medicine, Seoul National University College of Medicine, 101 Daehak-ro, Jongno-gu, Seoul, 03080, Republic of Korea, 82 0220721965; <email>ymchomd@snu.ac.kr</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>10</day><month>9</month><year>2025</year></pub-date><volume>27</volume><elocation-id>e71592</elocation-id><history><date date-type="received"><day>22</day><month>01</month><year>2025</year></date><date date-type="rev-recd"><day>20</day><month>06</month><year>2025</year></date><date date-type="accepted"><day>20</day><month>06</month><year>2025</year></date></history><copyright-statement>&#x00A9; Seong-Eun Moon, Ji Won Yoon, Jae Hyun Bae, Shinyoung Joo, Yoo Hyung Kim, Bon Hyang Lee, Seokho Yoon, Haanju Yoo, Young Min Cho. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 10.9.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2025/1/e71592"/><abstract><sec><title>Background</title><p>Biological age (BA) is increasingly recognized as a valuable alternative to chronological age (CA) for assessing an individual&#x2019;s health and aging status. However, existing models are based on limited clinical parameters and have not thoroughly integrated morbidity and mortality data.</p></sec><sec><title>Objective</title><p>This study aimed to develop and validate a novel transformer-based model, referred to as the BA &#x2013; CA gap model, for BA estimation that incorporates morbidity and mortality information to improve predictive accuracy and enhance clinical use in the early identification of the risk of age-related diseases.</p></sec><sec sec-type="methods"><title>Methods</title><p>We retrospectively analyzed data from 151,281 adults aged 18 years or older who underwent routine health checkups between 2003 and 2020. Participants were classified into normal, predisease, and disease groups based on comorbidities (diabetes mellitus, hypertension, and dyslipidemia) to evaluate the model&#x2019;s ability to discriminate health status along a clinically relevant spectrum. Variables with less than 50% missingness had missing values imputed using the mean, while features with 50% or more missingness were excluded. We develop a custom transformer architecture that learns multiple objectives simultaneously, including input feature reconstruction, BA and CA alignment, health status discrimination, and mortality prediction. Model training used unsupervised and self-supervised strategies. We compared our model&#x2019;s performance with conventional BA estimation approaches, including Klemera and Doubal&#x2019;s method, a CA cluster-based model, and a deep neural network, by examining BA gap distributions, health status stratification, and mortality prediction.</p></sec><sec sec-type="results"><title>Results</title><p>The proposed BA &#x2013; CA gap model provided a more accurate reflection of health status and superior stratification of mortality risk than existing methods. The model effectively distinguished among normal, predisease, and disease groups, with a clear gradient of BA gap values. Kaplan-Meier analyses demonstrated stronger discrimination of future mortality in men, while a similar but not statistically significant trend was observed in women. Sensitivity analyses across multiple random splits and training subsets confirmed the robustness of the model&#x2019;s performance.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>By integrating morbidity and mortality information within a transformer-based framework, the BA &#x2013; CA gap model offers a more granular and clinically meaningful assessment of aging and health status than CA alone. This approach supports the potential for personalized health management and risk stratification, although external validation in diverse populations is warranted to further confirm its generalizability.</p></sec></abstract><kwd-group><kwd>aging</kwd><kwd>deep learning</kwd><kwd>health status</kwd><kwd>morbidity</kwd><kwd>mortality</kwd><kwd>risk assessment</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Biological aging is a critical determinant of functional decline, age-related diseases, and mortality, distinct from chronological aging, which simply measures time since birth [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Biological age (BA) is shaped by a complex interplay of genetics, environmental exposures, modifiable lifestyle factors, and chronic diseases, making it highly individualized and multifaceted [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Distinguishing BA from chronological age (CA) has important implications for preventive health care [<xref ref-type="bibr" rid="ref5">5</xref>], risk stratification for frailty [<xref ref-type="bibr" rid="ref6">6</xref>], and extending healthy lifespan [<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Over recent decades, numerous methods have been developed to estimate BA, ranging from composite indices of clinical markers [<xref ref-type="bibr" rid="ref6">6</xref>] to advanced models using omics data [<xref ref-type="bibr" rid="ref8">8</xref>] and machine learning techniques [<xref ref-type="bibr" rid="ref9">9</xref>]. Although these methods have proven valuable in aging research, they face notable challenges. Traditional models frequently use CA as the principal anchor, assuming a close alignment between BA and CA, which limits their applicability across diverse populations and health statuses [<xref ref-type="bibr" rid="ref10">10</xref>]. Additionally, many models are trained exclusively on healthy or homogeneous cohorts, raising concerns about their generalizability to individuals with multimorbidity or differing baseline risks [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Recent advances in artificial intelligence (AI), such as deep learning, offer significant promise for modeling the nonlinear and high-dimensional relationships inherent in biological aging [<xref ref-type="bibr" rid="ref12">12</xref>]. While these approaches have shown potential for assessing age-related outcomes using clinical data [<xref ref-type="bibr" rid="ref13">13</xref>], many published BA estimation models do not explicitly incorporate morbidity or mortality data during model training [<xref ref-type="bibr" rid="ref14">14</xref>]. As a result, their relevance to clinical practice and their ability to predict meaningful health outcomes may be limited. Moreover, robust frameworks that integrate longitudinal health events across heterogeneous populations are still lacking [<xref ref-type="bibr" rid="ref15">15</xref>], further hampering translation into real-world practice.</p><p>This study addresses these gaps by proposing a transformer-based deep learning model for BA estimation that integrates both morbidity and mortality data from routine clinical practice. Unlike conventional approaches, our model is designed to jointly learn the reconstruction of clinical features, discrimination of health status, prediction of all-cause mortality, and semantic alignment of BA and CA. This yields a single, interpretable BA &#x2013; CA gap metric that reliably reflects both biological aging and prospective health risk.</p><p>We hypothesize that by leveraging morbidity and mortality data within a transformer-based deep learning framework, the model will provide BA estimates that more accurately reflect individual health status and better predict adverse outcomes than conventional approaches. To enhance accuracy and generalizability, our model was trained and validated using a large-scale, longitudinal health checkup dataset comprising more than 150,000 adults. We compared its performance with established BA estimation methods, focusing on the discrimination of morbidity status and prediction of future mortality.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Cohort</title><p>The study cohort included individuals who underwent health checkups at the Seoul National University Hospital Healthcare System Gangnam Center between 2003 and 2020. The dataset comprised initial visit records of 151,281 individuals who voluntarily participated in routine health examinations. Participants underwent these checkups, either as part of or in addition to examinations mandated by the Korean National Health Insurance Service, a single-payer system with near-universal coverage [<xref ref-type="bibr" rid="ref16">16</xref>].</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>The study protocol was approved by the Institutional Review Board (IRB) of Seoul National University Hospital (IRB number 2104-098-1211) and was conducted in accordance with the Declaration of Helsinki and applicable regulations [<xref ref-type="bibr" rid="ref17">17</xref>]. All data were retrospectively collected and fully anonymized by the institution&#x2019;s data management team using a secure coding process that removed direct identifiers (eg, names and resident registration numbers), thus ensuring participant confidentiality. As the analysis involved only deidentified data used for secondary research purposes, the IRB waived the requirement for informed consent, consistent with institutional guidelines. This study follows the STROBE (Strengthening the Reporting of Observational Studies in Epidemiology) guidelines [<xref ref-type="bibr" rid="ref18">18</xref>].</p></sec><sec id="s2-3"><title>Measurements</title><p>Comorbidities, including diabetes mellitus (DM), hypertension, dyslipidemia, cardiovascular disease, and cancer, were assessed through standardized questionnaires. Information regarding medication use for DM, hypertension, and dyslipidemia was also collected. Additional data were obtained from anthropometric measurements, blood tests, bioelectrical impedance analysis, and pulmonary function tests. Mortality data, linked to Statistics Korea, were analyzed from January 2004 to December 2020. Deaths due to external causes (<italic>International Classification of Diseases, Tenth Revision</italic> [<italic>ICD-10</italic>] codes: S00-S99, T00-T98, U01-U03, V01-V99, W00-W99, X00-X84, X85-Y05, Y08-Y09, Y10-Y36, and Y85-Y89) were excluded.</p></sec><sec id="s2-4"><title>Data Preprocessing</title><p>Features with &#x2265;50% missing data were excluded. Remaining missing values were imputed using the mean of observed values, recognizing that this approach could reduce variability and introduce bias, but were chosen for pragmatic management of a large, complex dataset.</p></sec><sec id="s2-5"><title>Population Classification for BA Estimation</title><p>Participants were categorized into 3 health status groups: normal, predisease, and disease. Normal status was defined as having fasting glucose &#x003C;100 mg/dL and glycated hemoglobin (HbA<sub>1c</sub>) &#x003C;5.7%, systolic blood pressure (SBP) &#x003C;120 mm Hg and diastolic blood pressure (DBP) &#x003C;80 mm Hg, low-density lipoprotein cholesterol (LDL-C) &#x003C;100 mg/dL, triglycerides &#x003C;150 mg/dL, and high-density lipoprotein cholesterol (HDL-C) &#x2265;60 mg/dL. Predisease status included fasting glucose 100&#x2010;125 mg/dL or HbA<sub>1c</sub> 5.7%&#x2010;6.4% (ie, prediabetes), SBP 120&#x2010;139 mm Hg or DBP 80&#x2010;89 mm Hg (ie, elevated blood pressure), LDL-C 100&#x2010;159 mg/dL, triglycerides 150&#x2010;199 mg/dL, or HDL-C 40&#x2010;59 mg/dL (ie, borderline lipid levels). Disease status was defined as having fasting glucose &#x2265;126 mg/dL, HbA<sub>1c</sub>&#x2265;6.5%, or the use of antidiabetic medications (ie, DM); SBP &#x2265;140 mm Hg, DBP &#x2265;90 mm Hg, or the use of antihypertensive medications (ie, hypertension); or LDL-C &#x2265;160 mg/dL, triglycerides &#x2265;200 mg/dL, HDL-C &#x003C;40 mg/dL, or the use of lipid-lowering medications (ie, dyslipidemia). Missing data that had been imputed using mean values were excluded from the categorization process. Consequently, individuals with missing values for the relevant variables were classified into the normal category.</p><p>The population was further classified into four groups based on morbidity: (1) normal population, (2) normal and predisease population, (3) entire population, and (4) entire population excluding outliers (individuals within mean, 2 SDs of key variables, including HbA<sub>1c</sub>, SBP, DBP, LDL-C, triglycerides, HDL-C, and waist circumference).</p></sec><sec id="s2-6"><title>Selection of Feature Sets</title><p>We defined 7 health-related domains (anemia, adiposity, inflammation, kidney function, lung function, metabolism, and nutrition), each linked to a representative clinical phenotype. Feature selection was guided by expert opinion (JYW, JHB, YHK, and YMC) and correlation with CA. The relationships between features and CA were analyzed using 3 methods: Pearson correlation coefficient (PCC) for linear relationships, Spearman rank-order correlation coefficient (SROCC) for monotonic relationships, and mutual information (MI) for informational dependence. Analyses were performed for both the entire and normal populations. Features in the top 10% of PCC, SROCC, and MI relative to CA are shown in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Features with &#x003E;50% missing data were excluded; remaining missing values were imputed as described in the &#x201C;Data Preprocessing&#x201D; section.</p><p>Three feature sets were developed: a base set (13 features), a morbidity-related set (the base set plus fasting glucose, SBP, DBP, LDL-C, triglyceride, and HDL-C), and the entire set (88 features; Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The number of features used in modeling varied according to data division, with CA-derived features excluded. Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> summarizes the proportion of missing data for key features. Despite the capability of transformer architectures to process high-dimensional data, preliminary feature selection was conducted to reduce dimensionality, minimize noise, and promote stable convergence, thereby enhancing computational feasibility and interpretability.</p></sec><sec id="s2-7"><title>Modeling for BA Prediction</title><p>The model predicts BA by using 3 feature sets, simultaneously training for encoder-decoder reconstruction, CA prediction, and gap estimation. The model comprises 4 key components: CA prediction, contrastive learning for morbidity, maximizing correlation with mortality, and gap semantic consistency (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Overview of the model architecture of gap-based biological age estimation. The model predicts biological age (BA) from clinical feature sets using a unified encoder-decoder framework. Input data are embedded with special tokens for chronological age (CA) and the BA &#x2013; CA gap, along with mask and positional encodings to account for missing values and input structure. The encoder generates latent feature representations, which are used for both CA prediction and gap estimation. The decoder reconstructs the embedded input. The model is trained via a multitask loss that integrates CA prediction, contrastive learning for morbidity, maximizing correlation with mortality, and enforcing semantic consistency of the BA &#x2013; CA gap. Separate models are developed for men and women to account for sex differences.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e71592_fig01.png"/></fig><sec id="s2-7-1"><title>Embedding</title><p>Each clinical feature or questionnaire response was standardized and mapped to an embedding vector via a fully connected layer. Input data (<inline-formula><mml:math id="ieqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:mstyle></mml:math></inline-formula>) sequences included feature embeddings and 2 special tokens: <inline-formula><mml:math id="ieqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> (chronological age) and <inline-formula><mml:math id="ieqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> (the BA gap to be estimated). CA was normalized and embedded as the <inline-formula><mml:math id="ieqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> token; <inline-formula><mml:math id="ieqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> was initially a placeholder, updated during inference to predict the BA gap. Missing values were marked using an imputation mask (<inline-formula><mml:math id="ieqn6"><mml:mi>m</mml:mi></mml:math></inline-formula>), with 0 indicating original values and 1 indicating imputed values. The input embedding is formulated as:</p><disp-formula id="equWL1"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msup><mml:mi>X</mml:mi><mml:mrow><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>x</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>m</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>p</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn7"><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the feature embedding layer, <inline-formula><mml:math id="ieqn8"><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> the masking embedding, and <inline-formula><mml:math id="ieqn9"><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> the positional embedding.</p></sec><sec id="s2-7-2"><title>Encoder-Decoder</title><p>The Performer, a Transformer with linear self-attention, was used as the encoder-decoder. The encoder generated latent features for the <inline-formula><mml:math id="ieqn10"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn11"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> tokens, which the decoder used to reconstruct the embedded input. Reconstruction loss (<inline-formula><mml:math id="ieqn12"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) for <inline-formula><mml:math id="ieqn13"><mml:mi>K</mml:mi></mml:math></inline-formula> samples is calculated using the mean squared error:</p><disp-formula id="equWL2"><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mi>K</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>N</mml:mi><mml:mo>+</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mo>+</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mover><mml:mi>X</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn14"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the embedded description of the <inline-formula><mml:math id="ieqn15"><mml:mi>j</mml:mi></mml:math></inline-formula>-th feature of the <inline-formula><mml:math id="ieqn16"><mml:mi>i</mml:mi></mml:math></inline-formula><italic>-</italic>th sample.</p></sec><sec id="s2-7-3"><title>CA Prediction</title><p>CA was predicted from the latent <inline-formula><mml:math id="ieqn17"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> token, processed through 2 fully connected layers. The loss combined mean squared error and <italic>R</italic><sup>2</sup> to improve accuracy and encourage CA-relevant encoding. Given the predicted <italic>&#x0177;</italic> and true CA <italic>y</italic>, the training loss <inline-formula><mml:math id="ieqn18"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula> is:</p><disp-formula id="E9"><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mtext>CA</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>K</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:msub><mml:mtext>CA</mml:mtext><mml:mrow><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mfrac><mml:mrow><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mrow><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mi>y</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula></sec><sec id="s2-7-4"><title>Gap Estimation</title><p>Rather than directly predicting BA, the model estimated the gap between BA and CA using the <inline-formula><mml:math id="ieqn19"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> token, modeled as a probability distribution and refined through multiple loss components. A negative BA &#x2013; CA gap indicates slower biological aging, while a positive gap reflects accelerated aging.</p><p><named-content content-type="indent">&#x2003;</named-content>1. Distribution alignment loss (<inline-formula><mml:math id="ieqn20"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) used the maximum mean discrepancy to align the latent features (<inline-formula><mml:math id="ieqn21"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) with a normal distribution <inline-formula><mml:math id="ieqn22"><mml:mi>N</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mn>0,1</mml:mn></mml:mrow></mml:mfenced></mml:math></inline-formula>:</p><disp-formula id="equWL4"><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">M</mml:mi><mml:mi mathvariant="normal">M</mml:mi><mml:mi mathvariant="normal">D</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></disp-formula><list list-type="simple"><list-item><p>2. Consistency loss (<inline-formula><mml:math id="ieqn23"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) ensured independence between the latent <inline-formula><mml:math id="ieqn24"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn25"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> tokens:</p></list-item></list><disp-formula id="equWL5"><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>K</mml:mi></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:msub><mml:mi mathvariant="normal">P</mml:mi><mml:mrow><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:msub><mml:mi mathvariant="normal">P</mml:mi><mml:mrow><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>Z</mml:mi><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn26"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:msub><mml:mi mathvariant="normal">P</mml:mi><mml:mrow><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula> is the gap mean inference network, and <inline-formula><mml:math id="ieqn27"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msubsup><mml:mi>Z</mml:mi><mml:mrow><mml:mi mathvariant="normal">G</mml:mi><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msubsup></mml:mrow></mml:mstyle></mml:math></inline-formula> is a shuffled or alternative version for independence.</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>3. Contrastive loss for morbidity (<inline-formula><mml:math id="ieqn28"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) contrasted <inline-formula><mml:math id="ieqn29"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>  (original gap) and  <inline-formula><mml:math id="ieqn30"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula> (corrected gap) estimates:</p><disp-formula id="E10"><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mtext>contrast</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>K</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>&#x03B3;</mml:mi><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">r</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>with <inline-formula><mml:math id="ieqn31"><mml:mi>&#x03B3;</mml:mi></mml:math></inline-formula> as a tunable margin parameter.</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>4. Mortality loss (<inline-formula><mml:math id="ieqn32"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>) enforced negative correlation between the predicted gap and observed time-to-death using PCC:</p><disp-formula id="equWL7"><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">_</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">_</mml:mi><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">h</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The total loss (<inline-formula><mml:math id="ieqn33"><mml:mi mathvariant="script">L</mml:mi></mml:math></inline-formula>) was a weighted sum of these components:</p><disp-formula id="equWL8"><mml:math id="eqn8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>6</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn34"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula> is the reconstruction loss, <inline-formula><mml:math id="ieqn35"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="fraktur">L</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula>  is the loss for CA prediction, and <inline-formula><mml:math id="ieqn36"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula> are hyperparameters controlling each loss term. The loss weight values were set to  <inline-formula><mml:math id="ieqn37"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn38"><mml:msub><mml:mrow><mml:mi>&#x03BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn39"><mml:msub><mml:mrow><mml:mi>&#x03BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.001</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn40"><mml:msub><mml:mrow><mml:mi>&#x03BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>4</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn41"><mml:msub><mml:mrow><mml:mi>&#x03BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>10</mml:mn></mml:math></inline-formula>, and <inline-formula><mml:math id="ieqn42"><mml:msub><mml:mrow><mml:mi>&#x03BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>6</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> for the experiments based on model performance on the subset for validation.</p></sec></sec><sec id="s2-8"><title>Model Implementation and Training</title><p>To account for sex differences, separate models were developed for women and men using PyTorch. Input data were embedded into 128-dimensional vectors. The Performer had 3 layers, 4 attention heads, and 1024 hidden units per feed-forward layer [<xref ref-type="bibr" rid="ref19">19</xref>]. The model was trained with the AdamP optimizer [<xref ref-type="bibr" rid="ref20">20</xref>], batch size of 2000, and learning rate of 1e&#x2212;3, for up to 1000 epochs, with early stopping if validation performance did not improve for 100 successive epochs. For comparison, baseline models were reimplemented following publication of Bae et al [<xref ref-type="bibr" rid="ref21">21</xref>], using their specified hyperparameters. Features for the Klemera and Doubal&#x2019;s method (KDM) [<xref ref-type="bibr" rid="ref22">22</xref>] were selected for original methodology, as alternative feature sets yielded less stable results in preliminary analyses.</p><p>The full dataset was randomly split into training (70%), validation (15%), and test (15%) subsets, repeated 3 times with different seeds to create distinct splits. For each split, separate models were trained, and performance metrics were averaged for robustness. Mortality analyses were conducted on test sets, with 956 total samples available for survival analysis.</p></sec><sec id="s2-9"><title>Morbidity Discrimination Through 5-Year Averaged BA and CA Gaps</title><p>The average gaps between BA and CA were calculated over 5-year intervals to demonstrate improved disease discrimination by the proposed model, compared with existing models, which often yielded implausibly variable BAs (eg, &#x003C;&#x2212;10 or &#x003E;10).</p></sec><sec id="s2-10"><title>Statistics and Reproducibility</title><p>Continuous variables were reported as mean (SD), and categorical variables as counts and percentages. Student <italic>t</italic> tests compared 2 independent groups. PCC was used to assess linear relationships with CA, SROCC to assess monotonic relationships with CA, and MI to assess informational dependence on CA. Univariate and multivariate linear regressions for time-to-death were conducted using CA, estimated BA, or predicted gap as independent variables. Model performance was benchmarked using KDM [<xref ref-type="bibr" rid="ref22">22</xref>], CA cluster [<xref ref-type="bibr" rid="ref14">14</xref>], and deep neural network (DNN) [<xref ref-type="bibr" rid="ref21">21</xref>] models. Associations with time-to-death were assessed through regression slope, <italic>R</italic><sup>2</sup>, and PCC. For mortality prediction, participants were classified as healthy and unhealthy groups by gap values, and survival was analyzed with Kaplan-Meier curves and log-rank tests. All reported <italic>P</italic> values were 2-sided, with <italic>P</italic>&#x003C;.05 considered statistically significant. Analyses were performed using SciPy.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Study Participants</title><p>We analyzed health checkup data from 151,281 individuals (n=78,847, 52.1% men; n=72,434, 47.9% women), with mean ages of 41.4 (SD 12.1) years for men and 45.7 (SD 12.5) years for women. Participants were categorized at baseline as normal, predisease, and disease (<xref ref-type="table" rid="table1">Table 1</xref>). Predisease conditions (prediabetes, elevated blood pressure, or borderline lipid levels) were present in 32.4% (n=21,368) of men and 53.4% (n=30,274) of women, while overt diseases (DM, hypertension, or dyslipidemia) were observed in 66.8% (n=56,979) of men and 39.3% (n=38,002) of women. The cohort was classified into 4 morbidity-based groups: normal population, normal and predisease population, entire population, and entire population excluding outliers.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Baseline characteristics of study participants<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics</td><td align="left" valign="bottom">Men</td><td align="left" valign="bottom">Women</td></tr></thead><tbody><tr><td align="left" valign="top">Participants, n (%)</td><td align="left" valign="top">78,847 (100)</td><td align="left" valign="top">72,434 (100)</td></tr><tr><td align="left" valign="top">&#x2003;Age (years), mean (SD)</td><td align="left" valign="top">41.4 (12.1)</td><td align="left" valign="top">45.7 (12.5)</td></tr><tr><td align="left" valign="top">BMI, kg/m<sup>2</sup></td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Data, n (%)</td><td align="left" valign="top">76,431 (96.9)</td><td align="left" valign="top">70,762 (97.7)</td></tr><tr><td align="left" valign="top">&#x2003;Mean (SD)</td><td align="left" valign="top">24.6 (11.3)</td><td align="left" valign="top">21.9 (3.3)</td></tr><tr><td align="left" valign="top">Glycated hemoglobin, %</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Data, n (%)</td><td align="left" valign="top">73,470 (93.2)</td><td align="left" valign="top">68,649 (94.8)</td></tr><tr><td align="left" valign="top">&#x2003;Mean (SD)</td><td align="left" valign="top">5.7 (0.7)</td><td align="left" valign="top">5.6 (0.6)</td></tr><tr><td align="left" valign="top">Morbidity status, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Normal</td><td align="left" valign="top">500 (0.8)</td><td align="left" valign="top">4,158 (7.3)</td></tr><tr><td align="left" valign="top">&#x2003;Predisease<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">21,368 (32.4)</td><td align="left" valign="top">30,274 (53.4)</td></tr><tr><td align="left" valign="top">&#x2003;Disease<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">56,979 (66.8)</td><td align="left" valign="top">38,002 (39.3)</td></tr><tr><td align="left" valign="top">Glycemic status, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Normal</td><td align="left" valign="top">30,997 (40.1)</td><td align="left" valign="top">38,818 (54.4)</td></tr><tr><td align="left" valign="top">&#x2003;Prediabetes</td><td align="left" valign="top">37,806 (49.0)</td><td align="left" valign="top">28,996 (40.6)</td></tr><tr><td align="left" valign="top">&#x2003;Diabetes mellitus</td><td align="left" valign="top">8394 (10.9)</td><td align="left" valign="top">3608 (5.0)</td></tr><tr><td align="left" valign="top">Blood pressure, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Normal</td><td align="left" valign="top">28,079 (36.6)</td><td align="left" valign="top">46,068 (64.8)</td></tr><tr><td align="left" valign="top">&#x2003;Elevated blood pressure</td><td align="left" valign="top">26,529 (34.5)</td><td align="left" valign="top">14,615 (20.6)</td></tr><tr><td align="left" valign="top">&#x2003;Hypertension</td><td align="left" valign="top">22,167 (28.9)</td><td align="left" valign="top">10,380 (14.6)</td></tr><tr><td align="left" valign="top">Lipids, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Normal</td><td align="left" valign="top">1781 (2.3)</td><td align="left" valign="top">7348 (10.3)</td></tr><tr><td align="left" valign="top">&#x2003;Borderline lipid levels</td><td align="left" valign="top">47,039 (61.2)</td><td align="left" valign="top">52,384 (73.7)</td></tr><tr><td align="left" valign="top">&#x2003;Dyslipidemia</td><td align="left" valign="top">27,994 (36.5)</td><td align="left" valign="top">11,407 (16.0)</td></tr><tr><td align="left" valign="top">Mortality</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Number of events, n (%)</td><td align="left" valign="top">2240 (2.8)</td><td align="left" valign="top">1106 (1.5)</td></tr><tr><td align="left" valign="top">&#x2003;Time-to-death (days), mean (SD)</td><td align="left" valign="top">2794 (1696)</td><td align="left" valign="top">2665 (1689)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Data are presented as mean (SD) or n (%).</p></fn><fn id="table1fn2"><p><sup>b</sup>Predisease is defined as prediabetes, elevated blood pressure, or borderline lipid levels. </p></fn><fn id="table1fn3"><p><sup>c</sup>Disease is defined as diabetes mellitus, hypertension, or dyslipidemia.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Discrimination of Health Status Using Gap Values</title><p>Our proposed model and established comparators, including the KDM, CA cluster, and DNN models, were trained to discriminate health status based on the gap between BA and CA. Negative gap values (<inline-formula><mml:math id="ieqn43"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mi mathvariant="normal">B</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>&#x003C;</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>) indicate better health, while positive gap values (<inline-formula><mml:math id="ieqn44"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mi mathvariant="normal">B</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">A</mml:mi></mml:mrow><mml:mo>&#x003E;</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula>) indicate poorer health.</p><p>BA estimates were consistently higher in all groups when algorithms were trained on the normal population, resulting in positively skewed gap values (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). In contrast, models trained on other populations showed negative gaps in the normal group and positive gaps in the disease group, regardless of feature set (Tables S5 and S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). When trained using the entire feature set on the entire population, the model exhibited the most reasonable trends: negative BA &#x2013; CA gaps for normal individuals and positive gaps for disease groups, with graded distributions across health status categories (<xref ref-type="fig" rid="figure2">Figure 2A-C</xref> and Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The proposed model outperformed existing models, particularly among men (<xref ref-type="fig" rid="figure3">Figures 3A-D</xref> and <xref ref-type="fig" rid="figure4">4A-D</xref>). The gap between BA and CA remained distinct across different health statuses, even in in-distribution test data (<xref ref-type="fig" rid="figure5">Figure 5A and B</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Discrimination of morbidity status by the gap-based model and conventional approaches. Panels show the mean gap between BA and CA with 95% CIs for normal, predisease, and disease groups, using 4 models: KDM (red), CAC (blue), DNN, and the gap-based model (purple). Each panel represents a different feature set: (A) base, (B) morbidity-related, and (C) entire feature set. Within each panel, results are shown separately for men (upper rows) and women (lower rows). The vertical dashed line at zero represents the equivalence of BA and CA. The gap-based model consistently shows negative BA &#x2013; CA gaps for normal and positive gaps for disease groups, reflecting a clear separation of morbidity status across both sexes and all feature sets. BA: biological age; CA: chronological age; CAC: chronological age cluster; DNN: deep neural network; KDM: Klemera and Doubal&#x2019;s method.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e71592_fig02.png"/></fig><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Averaged gaps between BA and CA by health status in men. The mean BA &#x2212; CA gap is plotted across 5-year CA intervals from normal (dotted line), predisease (solid green), and disease (dashed red) groups. Each panel shows results from a different model: (A) KDM, (B) CAC, (C) DNN, and (D) gap-based model. All models were trained on the entire population using the entire feature set. The gap-based model demonstrates consistent and clinically interpretable separation among health status groups, with normal individuals showing persistently more negative BA &#x2013; CA gaps across all ages and disease groups showing higher values. BA: biological age; CA: chronological age; CAC: chronological age cluster; DNN: deep neural network; KDM: Klemera and Doubal&#x2019;s method.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e71592_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Averaged gaps between BA and CA by health status in women. The mean BA &#x2013; CA gap is plotted across 5-year CA intervals from normal (dotted line), predisease (solid green), and disease (dashed red) groups. Each panel shows results from a different model: (A) KDM, (B) CAC, (C) DNN, and (D) gap-based model. All models were trained on the entire population using the entire feature set. Among the models, the gap-based model most clearly separates health status groups across age intervals. BA: biological age; CA: chronological age; CAC: chronological age cluster; DNN: deep neural network; KDM: Klemera and Doubal&#x2019;s method.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e71592_fig04.png"/></fig><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Distributions of the gap between BA and CA in normal and disease groups. Box-and-whisker plots show the distribution of gaps between BA and CA across 4 models: KDM, CAC, DNN, and gap-based model. (A) Results for men. (B) Results for women. All models were trained on the entire population using the entire feature set. For each model, the box indicates the IQR, the line inside the box denotes the median, and whiskers represent 1.5 times the IQR. Outliers are shown as circles. Significant differences between groups (<sup>*</sup><italic>P</italic>&#x003C;.001) are indicated. Among the models, the gap-based model demonstrates the most pronounced separation between normal and disease groups, consistently showing lower BA &#x2013; CA gaps for normal individuals and higher values for those with disease in both men and women. BA: biological age; CA: chronological age; CAC: chronological age cluster; DNN: deep neural network; KDM: Klemera and Doubal&#x2019;s method.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e71592_fig05.png"/></fig><p><xref ref-type="fig" rid="figure3">Figures 3</xref> and <xref ref-type="fig" rid="figure4">4</xref> demonstrate that normal status is most common among individuals aged 20&#x2010;40 years. This reflects the cohort&#x2019;s characteristics, as most older participants presented with at least 1 borderline or overt chronic condition, while younger participants (younger than 20 years) often had incomplete data or laboratory abnormalities classifying them as a predisease group. Thus, the observed distribution results from strict health criteria and underlying population characteristics, not from a lack of health individuals outside this age range.</p><p>The BA &#x2013; CA gap robustly reflected the burden of major chronic diseases across multiple domains. Individuals with prediabetes or diabetes exhibited higher gaps than those with normal glycemic status, indicating that even early glucose dysregulation is associated with accelerated biological aging (Tables S8 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Similarly, BA &#x2013; CA gaps shifted from negative to positive values as blood pressure or lipid levels progressed from normal to overt disease (Tables S9 and S10 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The presence of cardiovascular disease or cancer was also associated with significantly increased BA &#x2013; CA gaps (Tables S11 and S12 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). These patterns were consistent in both women and men. Collectively, the model&#x2019;s gap serves as a sensitive and clinically relevant marker, integrating the cumulative impact of metabolic risk factors and chronic disease burden on aging.</p></sec><sec id="s3-3"><title>Mortality Prediction Using Gap Values</title><p>We evaluated the predictive value of the BA &#x2013; CA gap for all-cause mortality using time-to-death data. Linear regression analyses demonstrated that the gap-based approach provided more accurate mortality predictions than the BA metric alone (Table S13 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Mortality analyses were performed on the test sets from 3 independent data splits, totaling 956 samples (334, 306, and 316 for each split). Participants were categorized as healthy (gap &#x003C;&#x2212;1), unhealthy (gap&#x003E;1), or reference group (gap between &#x2212;1 and 1). The proposed model outperformed all existing models in mortality prediction (Table S13 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Kaplan-Meier survival curves confirmed the model&#x2019;s effectiveness in distinguishing healthy, unhealthy, and reference groups among men (<xref ref-type="fig" rid="figure6">Figure 6A-D</xref>). Although survival curve differences did not reach statistical significance among women, trends were consistent: unhealthy individuals exhibited decreased survival, while healthy individuals showed an upward survival trajectory (<xref ref-type="fig" rid="figure7">Figure 7A-D</xref>). In contrast, existing models did not effectively discriminate survival outcomes by health status in both men and women.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>All-cause mortality according to groups by the gap between BA and CA in men. Kaplan-Meier survival curves show all-cause mortality according to the gap between BA and CA in men with death events, using 4 models: (A) KDM, (B) CAC, (C) DNN, and (D) gap-based model. Participants were classified as healthy (gap &#x003C; &#x2212;1, dotted blue), unhealthy (gap &#x003E; 1, dashed red), or reference (&#x2013;1 &#x2264; gap &#x2264; 1, solid green). The number at risk in each group is indicated below each plot. In the gap-based model, both the unhealthy and reference groups show significantly higher mortality risk than the healthy group (<italic>*P</italic>&#x003C;.001 vs healthy; <italic>**P</italic>=.001 vs reference; log-rank test). BA: biological age; CA: chronological age; CAC: chronological age cluster; DNN: deep neural network; KDM: Klemera and Doubal&#x2019;s method.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e71592_fig06.png"/></fig><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>All-cause mortality according to groups by the gap between BA and CA in women. Kaplan-Meier survival curves show all-cause mortality according to the gap between BA and CA in women with death events, using 4 models: (A) KDM, (B) CAC, (C) DNN, and (D) gap-based model. Participants were classified as healthy (gap &#x003C; &#x2212;1, dotted blue), unhealthy (gap &#x003E; 1, dashed red), or reference (&#x2013;1 &#x2264; gap &#x2264; 1, solid green). The number at risk in each group is indicated below each plot. In the gap-based model, the unhealthy group showed a trend toward higher mortality risk than the healthy group (<italic>*P</italic>=.07 vs healthy; log-rank test). BA: biological age; CA: chronological age; CAC: chronological age cluster; DNN: deep neural network; KDM: Klemera and Doubal&#x2019;s method.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e71592_fig07.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study introduces a novel deep learning framework for estimating BA by integrating morbidity and mortality information with comprehensive health checkup data. Our transformer-based, multiobjective model, which calculates the gap between BA and CA, outperformed existing BA prediction methods in discriminating disease status and predicting mortality risk. Notably, the BA &#x2013; CA gap derived from our model showed a stronger correlation with time-to-death than conventional BA estimates, underscoring its potential clinical relevance.</p><p>BA estimating methods that heavily rely on CA limit their precision, particularly in populations with heterogeneous health profiles [<xref ref-type="bibr" rid="ref2">2</xref>]. In contrast, our approach leverages the BA &#x2013; CA gap and a diverse array of clinical features, resulting in a more individualized assessment of biological aging. Existing DNN models have struggled to accurately reflect morbidity, highlighting the advantages of our gap-based, transformer architecture, which jointly optimizes morbidity classification, mortality prediction, and feature reconstruction. Unlike conventional models, such as the KDM, which focus on physiological biomarkers, our approach incorporates explicit morbidity and mortality data, enabling more comprehensive modeling of the aging process. In multivariate analyses, our gap-based model demonstrated that existing models fail to fully account for health status independently of CA.</p><p>Our BA &#x2013; CA gap metric serves as an integrated marker, encompassing both health status and mortality risk, and thus provides a unified summary of an individual&#x2019;s biological aging trajectory. Multitask training enables the model to embed complex relationships among morbidity, mortality, and aging, producing an interpretable and actionable output. This integration supports the use of the BA &#x2013; CA gap as a comprehensive biomarker for aging.</p><p>The choice of training population is crucial in AI-based health modeling. We found that restricting training to a strictly defined normal population, despite its small sample size, skewed gap values and failed to reflect broader population patterns. Conversely, training on more diverse populations improved discrimination of morbidity status. Although we tested models across different feature sets, those using the full feature set performed best in mortality prediction, even when morbidity prediction performance was similar.</p><p>Although mortality prediction is a common task in deep learning research, few previous models for BA estimation have explicitly incorporated both morbidity and mortality as learning objectives [<xref ref-type="bibr" rid="ref23">23</xref>]. While CA data are readily available, labeled outcomes for health events or direct BA phenotypes are insufficient, especially in longitudinal or community-based cohorts [<xref ref-type="bibr" rid="ref24">24</xref>]. In this context, unsupervised and self-supervised learning methods are particularly advantageous, as they can extract meaningful biological aging signals despite limited labeled data. These approaches offer both adaptability and scalability, making them well-suited for aging research where outcome labels are often unavailable [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>].</p><p>Our findings suggest that a transformer-based BA &#x2013; CA gap model incorporating morbidity and mortality data more effectively stratifies health status and predicts mortality risk than existing models. However, further prospective and multicenter studies are needed to establish its generalizability and clinical use. Implementation of such AI-based tools must also address ethical considerations, including data privacy, equitable access, and algorithmic transparency.</p></sec><sec id="s4-2"><title>Limitations</title><p>This study has several limitations. First, the study design does not allow for causal inference regarding the effects of specific comorbidities on biological aging. Future studies or analyses using causal modeling will be necessary to clarify these relationships. Second, our cohort consisted of individuals who voluntarily underwent routine health checkups at a single tertiary academic institution in Korea. This population may not represent the broader community, introducing selection bias and limiting the generalizability of our findings. External validation in larger, more diverse, and population-based cohorts, including different ethnic, demographic, and socioeconomic backgrounds, will be essential. Third, comorbidities were primarily identified through self-reported questionnaires, which can be subject to recall bias and misclassification. Although we attempted to mitigate this by cross-referencing with medication records and laboratory data, residual inaccuracies may persist. Fourth, while most key features had low missingness, we used mean imputation for missing data to ensure transparency and computational efficiency. This approach, however, may introduce bias and reduce variability, potentially affecting model performance. Our model incorporated a missingness mask to help address this issue, but more robust imputation methods should be explored. Fifth, classifying individuals with missing data on key features as normal may have introduced classification bias, resulting in potential misclassification of participants with unrecognized predisease or disease. This could impact the accuracy of health status stratification and model evaluation. Finally, our analysis relied exclusively on health checkup data. Integrating additional data sources, including genomics, proteomics, or digital health metrics, could further enhance the predictive performance, biological relevance, and clinical applicability of BA estimation models.</p></sec><sec id="s4-3"><title>Conclusions</title><p>We developed a deep learning model that estimates BA by integrating morbidity and mortality data within a unified framework, using unsupervised and self-supervised learning. This BA &#x2013; CA gap&#x2013;based approach serves a sensitive, interpretable biomarker of biological aging and holds promise for advancing personalized health management.</p></sec></sec></body><back><ack><p>This work was supported by Daewoong Pharmaceutical Co, Ltd, Seoul, Republic of Korea. The funder had no role in study conceptualization, design, data collection, analysis, interpretation, or manuscript preparation.</p></ack><notes><sec><title>Data Availability</title><p>The source code for all model components and preprocessing steps will be made publicly available in a web-based repository upon publication. Participant-level data will not be shared to protect confidentiality; however, all scripts, model architectures, and documentation required for reproducibility will be provided.</p></sec></notes><fn-group><fn fn-type="con"><p>SEM, JWY, and YMC conceived and designed the study. JWY, JHB, YHK, HY, and YMC provided supervision and strategic oversight. JWY and YHK were responsible for data collection and curation. SEM, JWY, JHB, YHK, and YMC developed the methodology. SEM implemented the deep learning model and performed the experiments. SEM, JWY, JHB, SJ, and SY analyzed and interpreted the results. SEM and JHB drafted the initial manuscript. SEM, JWY, JHB, BHL, and YMC critically reviewed and revised the manuscript. All authors had access to the raw data, with SEM, SJ, SY, and HY having full access to the experimental datasets. The decision to submit the manuscript was approved by all authors. SEM, JWY, and JHB contributed equally as first authors. HY and YMC contributed equally as corresponding authors.</p></fn><fn fn-type="conflict"><p>Seoul National University and NAVER have pending patents related to this work. JWY, YHK, and YMC are employees of Seoul National University Hospital. YMC serves as an independent director of Daewoong Pharmaceutical Co, Ltd. SEM, SJ, and HY are employees and shareholders of NAVER. All other authors declare no conflicts of interest.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">BA</term><def><p>biological age</p></def></def-item><def-item><term id="abb3">CA</term><def><p>chronological age</p></def></def-item><def-item><term id="abb4">DBP</term><def><p>diastolic blood pressure</p></def></def-item><def-item><term id="abb5">DM</term><def><p>diabetes mellitus</p></def></def-item><def-item><term id="abb6">DNN</term><def><p>deep neural network</p></def></def-item><def-item><term id="abb7">HbA<sub>1c</sub></term><def><p>glycated hemoglobin</p></def></def-item><def-item><term id="abb8">HDL-C</term><def><p>high-density lipoprotein cholesterol</p></def></def-item><def-item><term id="abb9"><italic>ICD-10</italic></term><def><p><italic>International Classification of Diseases, Tenth Revision</italic></p></def></def-item><def-item><term id="abb10">IRB</term><def><p>institutional review board</p></def></def-item><def-item><term id="abb11">KDM</term><def><p>Klemera and Doubal&#x2019;s method</p></def></def-item><def-item><term id="abb12">LDL-C</term><def><p>low-density lipoprotein cholesterol</p></def></def-item><def-item><term id="abb13">MI</term><def><p>mutual information</p></def></def-item><def-item><term id="abb14">PCC</term><def><p>Pearson correlation coefficient</p></def></def-item><def-item><term id="abb15">SBP</term><def><p>systolic blood pressure</p></def></def-item><def-item><term id="abb16">SROCC</term><def><p>Spearman rank-order correlation coefficient</p></def></def-item><def-item><term id="abb17">STROBE</term><def><p>Strengthening the Reporting of Observational Studies in Epidemiology</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hamczyk</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Nevado</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Barettino</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fuster</surname><given-names>V</given-names> </name><name name-style="western"><surname>Andr&#x00E9;s</surname><given-names>V</given-names> </name></person-group><article-title>Biological versus chronological aging: JACC focus seminar</article-title><source>J Am Coll Cardiol</source><year>2020</year><month>03</month><day>3</day><volume>75</volume><issue>8</issue><fpage>919</fpage><lpage>930</lpage><pub-id pub-id-type="doi">10.1016/j.jacc.2019.11.062</pub-id><pub-id pub-id-type="medline">32130928</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tian</surname><given-names>YE</given-names> </name><name name-style="western"><surname>Cropley</surname><given-names>V</given-names> </name><name name-style="western"><surname>Maier</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Lautenschlager</surname><given-names>NT</given-names> </name><name name-style="western"><surname>Breakspear</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zalesky</surname><given-names>A</given-names> </name></person-group><article-title>Heterogeneous aging across multiple organ systems and prediction of chronic disease and mortality</article-title><source>Nat Med</source><year>2023</year><month>05</month><volume>29</volume><issue>5</issue><fpage>1221</fpage><lpage>1231</lpage><pub-id pub-id-type="doi">10.1038/s41591-023-02296-6</pub-id><pub-id pub-id-type="medline">37024597</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Melzer</surname><given-names>D</given-names> </name><name name-style="western"><surname>Pilling</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Ferrucci</surname><given-names>L</given-names> </name></person-group><article-title>The genetics of human ageing</article-title><source>Nat Rev Genet</source><year>2020</year><month>02</month><volume>21</volume><issue>2</issue><fpage>88</fpage><lpage>101</lpage><pub-id pub-id-type="doi">10.1038/s41576-019-0183-6</pub-id><pub-id pub-id-type="medline">31690828</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tian</surname><given-names>YE</given-names> </name><name name-style="western"><surname>Skampardoni</surname><given-names>I</given-names> </name><etal/></person-group><article-title>The genetic architecture of biological age in nine human organ systems</article-title><source>Nat Aging</source><year>2024</year><month>09</month><volume>4</volume><issue>9</issue><fpage>1290</fpage><lpage>1307</lpage><pub-id pub-id-type="doi">10.1038/s43587-024-00662-8</pub-id><pub-id pub-id-type="medline">38942983</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sabayan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Doyle</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rost</surname><given-names>NS</given-names> </name><name name-style="western"><surname>Sorond</surname><given-names>FA</given-names> </name><name name-style="western"><surname>Lakshminarayan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Launer</surname><given-names>LJ</given-names> </name></person-group><article-title>The role of population-level preventive care for brain health in ageing</article-title><source>Lancet Healthy Longev</source><year>2023</year><month>06</month><volume>4</volume><issue>6</issue><fpage>e274</fpage><lpage>e283</lpage><pub-id pub-id-type="doi">10.1016/S2666-7568(23)00051-X</pub-id><pub-id pub-id-type="medline">37201543</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elliott</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Caspi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Houts</surname><given-names>RM</given-names> </name><etal/></person-group><article-title>Disparities in the pace of biological aging among midlife adults of the same chronological age have implications for future frailty risk and policy</article-title><source>Nat Aging</source><year>2021</year><month>03</month><volume>1</volume><issue>3</issue><fpage>295</fpage><lpage>308</lpage><pub-id pub-id-type="doi">10.1038/s43587-021-00044-4</pub-id><pub-id pub-id-type="medline">33796868</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Crane</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Wilkinson</surname><given-names>G</given-names> </name><name name-style="western"><surname>Teare</surname><given-names>H</given-names> </name></person-group><article-title>Healthspan versus lifespan: new medicines to close the gap</article-title><source>Nat Aging</source><year>2022</year><month>11</month><volume>2</volume><issue>11</issue><fpage>984</fpage><lpage>988</lpage><pub-id pub-id-type="doi">10.1038/s43587-022-00318-5</pub-id><pub-id pub-id-type="medline">37118086</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rutledge</surname><given-names>J</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wyss-Coray</surname><given-names>T</given-names> </name></person-group><article-title>Measuring biological age using omics data</article-title><source>Nat Rev Genet</source><year>2022</year><month>12</month><volume>23</volume><issue>12</issue><fpage>715</fpage><lpage>727</lpage><pub-id pub-id-type="doi">10.1038/s41576-022-00511-7</pub-id><pub-id pub-id-type="medline">35715611</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ashiqur Rahman</surname><given-names>S</given-names> </name><name name-style="western"><surname>Giacobbi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Pyles</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mullett</surname><given-names>C</given-names> </name><name name-style="western"><surname>Doretto</surname><given-names>G</given-names> </name><name name-style="western"><surname>Adjeroh</surname><given-names>DA</given-names> </name></person-group><article-title>Deep learning for biological age estimation</article-title><source>Brief Bioinform</source><year>2021</year><month>03</month><day>22</day><volume>22</volume><issue>2</issue><fpage>1767</fpage><lpage>1781</lpage><pub-id pub-id-type="doi">10.1093/bib/bbaa021</pub-id><pub-id pub-id-type="medline">32363395</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bafei</surname><given-names>SEC</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>C</given-names> </name></person-group><article-title>Biomarkers selection and mathematical modeling in biological age estimation</article-title><source>NPJ Aging</source><year>2023</year><month>07</month><day>1</day><volume>9</volume><issue>1</issue><fpage>13</fpage><pub-id pub-id-type="doi">10.1038/s41514-023-00110-8</pub-id><pub-id pub-id-type="medline">37393295</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chua</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>D</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Tackling prediction uncertainty in machine learning for healthcare</article-title><source>Nat Biomed Eng</source><year>2023</year><month>06</month><volume>7</volume><issue>6</issue><fpage>711</fpage><lpage>718</lpage><pub-id pub-id-type="doi">10.1038/s41551-022-00988-x</pub-id><pub-id pub-id-type="medline">36581695</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhavoronkov</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bischof</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>KF</given-names> </name></person-group><article-title>Artificial intelligence in longevity medicine</article-title><source>Nat Aging</source><year>2021</year><month>01</month><volume>1</volume><issue>1</issue><fpage>5</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1038/s43587-020-00020-4</pub-id><pub-id pub-id-type="medline">37118000</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qiu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kaeberlein</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SI</given-names> </name></person-group><article-title>ExplaiNAble BioLogical Age (ENABL Age): an artificial intelligence framework for interpretable biological age</article-title><source>Lancet Healthy Longev</source><year>2023</year><month>12</month><volume>4</volume><issue>12</issue><fpage>e711</fpage><lpage>e723</lpage><pub-id pub-id-type="doi">10.1016/S2666-7568(23)00189-7</pub-id><pub-id pub-id-type="medline">37944549</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rahman</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Adjeroh</surname><given-names>DA</given-names> </name></person-group><article-title>Centroid of age neighborhoods: a new approach to estimate biological age</article-title><source>IEEE J Biomed Health Inform</source><year>2020</year><month>04</month><volume>24</volume><issue>4</issue><fpage>1226</fpage><lpage>1234</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2019.2930938</pub-id><pub-id pub-id-type="medline">31352357</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Ploner</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Longitudinal trajectories, correlations and mortality associations of nine biological ages across 20-years follow-up</article-title><source>Elife</source><year>2020</year><month>02</month><day>11</day><volume>9</volume><fpage>e51507</fpage><pub-id pub-id-type="doi">10.7554/eLife.51507</pub-id><pub-id pub-id-type="medline">32041686</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Han</surname><given-names>K</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SH</given-names> </name></person-group><article-title>Current trends of big data research using the Korean National Health Information Database</article-title><source>Diabetes Metab J</source><year>2022</year><month>07</month><volume>46</volume><issue>4</issue><fpage>552</fpage><lpage>563</lpage><pub-id pub-id-type="doi">10.4093/dmj.2022.0193</pub-id><pub-id pub-id-type="medline">35929173</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>World Medical Association</collab></person-group><article-title>World Medical Association Declaration of Helsinki: ethical principles for medical research involving human participants</article-title><source>JAMA</source><year>2025</year><month>01</month><day>7</day><volume>333</volume><issue>1</issue><fpage>71</fpage><lpage>74</lpage><pub-id pub-id-type="doi">10.1001/jama.2024.21972</pub-id><pub-id pub-id-type="medline">39425955</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elm</surname><given-names>E von</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Egger</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pocock</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>G&#x00F8;tzsche</surname><given-names>PC</given-names> </name><name name-style="western"><surname>Vandenbroucke</surname><given-names>JP</given-names> </name></person-group><article-title>Strengthening the Reporting of Observational Studies in Epidemiology (STROBE) statement: guidelines for reporting observational studies</article-title><source>BMJ</source><year>2007</year><month>10</month><day>20</day><volume>335</volume><issue>7624</issue><fpage>806</fpage><lpage>808</lpage><pub-id pub-id-type="doi">10.1136/bmj.39335.541782.AD</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Choromanski</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Likhosherstov</surname><given-names>V</given-names> </name><name name-style="western"><surname>Dohan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Song</surname><given-names>X</given-names> </name><name name-style="western"><surname>Gane</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sarlos</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Rethinking attention with performers</article-title><conf-name>International Conference on Learning Representations</conf-name><conf-date>May 3-7, 2021</conf-date><conf-loc>Vienna, Austria</conf-loc></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Heo</surname><given-names>B</given-names> </name><name name-style="western"><surname>Chun</surname><given-names>S</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Han</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yun</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>G</given-names> </name><etal/></person-group><article-title>AdamP: slowing down the slowdown for momentum optimizers on scale-invariant weights</article-title><conf-name>International Conference on Learning Representations</conf-name><conf-date>Apr 26-30, 2020</conf-date><conf-loc>Addis Ababa, Ethiopia</conf-loc></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bae</surname><given-names>CY</given-names> </name><name name-style="western"><surname>Im</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Comparison of biological age prediction models using clinical biomarkers commonly measured in clinical practice settings: AI techniques vs. traditional statistical methods</article-title><source>Front Anal Sci</source><year>2021</year><volume>1</volume><fpage>1</fpage><lpage>12</lpage><pub-id pub-id-type="doi">10.3389/frans.2021.709589</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Klemera</surname><given-names>P</given-names> </name><name name-style="western"><surname>Doubal</surname><given-names>S</given-names> </name></person-group><article-title>A new approach to the concept and computation of biological age</article-title><source>Mech Ageing Dev</source><year>2006</year><month>03</month><volume>127</volume><issue>3</issue><fpage>240</fpage><lpage>248</lpage><pub-id pub-id-type="doi">10.1016/j.mad.2005.10.004</pub-id><pub-id pub-id-type="medline">16318865</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tian</surname><given-names>S</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Accelerated biological aging elevates the risk of cardiometabolic multimorbidity and mortality</article-title><source>Nat Cardiovasc Res</source><year>2024</year><volume>3</volume><issue>3</issue><fpage>332</fpage><lpage>342</lpage><pub-id pub-id-type="doi">10.1038/s44161-024-00438-8</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rani</surname><given-names>V</given-names> </name><name name-style="western"><surname>Nabi</surname><given-names>ST</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mittal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>K</given-names> </name></person-group><article-title>Self-supervised learning: a succinct review</article-title><source>Arch Comput Methods Eng</source><year>2023</year><volume>30</volume><issue>4</issue><fpage>2761</fpage><lpage>2775</lpage><pub-id pub-id-type="doi">10.1007/s11831-023-09884-2</pub-id><pub-id pub-id-type="medline">36713767</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Therneau</surname><given-names>TM</given-names> </name><etal/></person-group><article-title>Unsupervised machine learning for the discovery of latent disease clusters and patient subgroups using electronic health records</article-title><source>J Biomed Inform</source><year>2020</year><month>02</month><volume>102</volume><fpage>103364</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2019.103364</pub-id><pub-id pub-id-type="medline">31891765</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Azizi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Culp</surname><given-names>L</given-names> </name><name name-style="western"><surname>Freyberg</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Robust and data-efficient generalization of self-supervised machine learning for diagnostic imaging</article-title><source>Nat Biomed Eng</source><year>2023</year><month>06</month><volume>7</volume><issue>6</issue><fpage>756</fpage><lpage>779</lpage><pub-id pub-id-type="doi">10.1038/s41551-023-01049-7</pub-id><pub-id pub-id-type="medline">37291435</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary tables presenting feature sets, proportions of missing data, model comparisons, disease-specific analyses, and regression analyses.</p><media xlink:href="jmir_v27i1e71592_app1.docx" xlink:title="DOCX File, 71 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>STROBE checklist.</p><media xlink:href="jmir_v27i1e71592_app2.docx" xlink:title="DOCX File, 41 KB"/></supplementary-material></app-group></back></article>