<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v27i1e65708</article-id><article-id pub-id-type="doi">10.2196/65708</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Federated Learning-Based Model for Predicting Mortality: Systematic Review and Meta-Analysis</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Tahir</surname><given-names>Nurfaidah</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jung</surname><given-names>Chau-Ren</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Shin-Da</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Azizah</surname><given-names>Nur</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Ho</surname><given-names>Wen-Chao</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Li</surname><given-names>Tsai-Chung</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Public Health, College of Public Health, China Medical University</institution><addr-line>No. 100, Section 1, Jingmao Road, Beitun District</addr-line><addr-line>Taichung</addr-line><country>Taiwan</country></aff><aff id="aff2"><institution>Department of Industrial Engineering, Hasanuddin University</institution><addr-line>Makassar</addr-line><country>Indonesia</country></aff><aff id="aff3"><institution>Japan Environment and Children&#x2019;s Study Programme Office, National Institute for Environmental Studies</institution><addr-line>Tsukuba</addr-line><country>Japan</country></aff><aff id="aff4"><institution>Department of Physical Therapy, Healthcare Science Program, China Medical University</institution><addr-line>Taichung</addr-line><country>Taiwan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Schwartz</surname><given-names>Amy</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Chenxu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Prakash</surname><given-names>Satya</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Yue</surname><given-names>Shi-Jun</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Arasteh</surname><given-names>Soroosh Tayebi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Wen-Chao Ho, PhD, Department of Public Health, College of Public Health, China Medical University, No. 100, Section 1, Jingmao Road, Beitun District, Taichung, 406040, Taiwan, 886 422053366 ext 6117; <email>whocmu@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>21</day><month>7</month><year>2025</year></pub-date><volume>27</volume><elocation-id>e65708</elocation-id><history><date date-type="received"><day>23</day><month>08</month><year>2024</year></date><date date-type="rev-recd"><day>06</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>13</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Nurfaidah Tahir, Chau-Ren Jung, Shin-Da Lee, Nur Azizah, Wen-Chao Ho, Tsai-Chung Li. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 21.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2025/1/e65708"/><abstract><sec><title>Background</title><p>The rise of federated learning (FL) as a novel privacy-preserving technology offers the potential to create models collaboratively in a decentralized manner to address confidentiality issues, particularly regarding data privacy. However, there is a scarcity of clear and comprehensive evidence that compares the performance of FL with that of the established centralized machine learning (CML) in the clinical domain.</p></sec><sec><title>Objective</title><p>This study aimed to review the performance comparisons of FL-based and CML models for mortality prediction in clinical settings.</p></sec><sec sec-type="methods"><title>Methods</title><p>Experimental studies comparing the performance of FL and CML in predicting mortality were selected. Articles were excluded if they did not compare FL with CML or only compared the effectiveness of different FL baseline models. Two independent reviewers performed the screening, data extraction, and risk of bias assessment. The IEEE Xplore, PubMed, ScienceDirect, and Web of Science databases were searched for articles published up to June 2024. The risk of bias was assessed using CHARMS (Checklist for Critical Appraisal and Data Extraction for Systematic Reviews of Prediction Modeling Studies) and PROBAST (Prediction Model Risk of Bias Assessment Tool). Meta-analyses of the pooled area under the receiver operating curve (AUROC)/area under the curve (AUC) were performed for within-group comparisons (before and after federation).</p></sec><sec sec-type="results"><title>Results</title><p>Nine articles with heterogeneous framework design, scenario, and clinical context were included: 4 articles focused on specific case types; 3 articles were conducted in intensive care unit settings; and 2 articles in emergency departments, urgent centers, or trauma centers. Cohort datasets involving 1,412,973 participants were used in all of the included studies. These studies universally indicated that the predictive performance of FL models is comparable to that of CML. The pooled AUC for the FL and CML performances were 0.81 (95% CI 0.76&#x2010;0.85; <italic>I</italic><sup>2</sup>=78.36%) and 0.82 (95% CI 0.77&#x2010;0.86; <italic>I</italic><sup>2</sup>=72.33%), respectively. The Higgins <italic>I</italic><sup>2</sup> test indicated high heterogeneity between the included studies (<italic>I</italic><sup>2</sup>&#x2265;50%). In total, 4 out of 9 (44%) of the developed models were identified as having a high risk of bias.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This systematic review and meta-analysis demonstrate that FL can achieve similar performance to CML while conquering privacy risks in predicting mortality across various settings. Owing to the small number of studies and a moderate proportion of the high risk of bias, the effect estimates might be imprecise.</p></sec><sec><title>Trial Registration</title><p>PROSPERO International Prospective Register of Systematic Reviews CRD42024539245; <ext-link ext-link-type="uri" xlink:href="https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=539245">https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=539245</ext-link></p></sec></abstract><kwd-group><kwd>federated learning</kwd><kwd>centralized machine learning</kwd><kwd>mortality prediction</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Predicting mortality is essential in medicine, and numerous tools have been developed for clinical settings. The accurate prediction of mortality enables health care providers to manage treatment planning and resource allocation [<xref ref-type="bibr" rid="ref1">1</xref>]. Estimating the likelihood of mortality at the end of an intensive care unit (ICU) stay or within a designated timeframe is an effective means of prioritizing care by optimizing staff and equipment use. In addition, such a prediction model improves personalized treatment, especially for individuals facing terminal illness, by identifying patients who may benefit from receiving palliative care, allowing the care plans to align with patient prognoses and preferences [<xref ref-type="bibr" rid="ref2">2</xref>].</p><p>The emergence of machine learning (ML), as a subset of artificial intelligence, has contributed to the development of computational thinking. ML empowers computers to &#x201C;learn&#x201D; from training data and augment their knowledge without the need for explicit programming. ML algorithms can identify patterns from data and use this knowledge to generate predictions. Thus, ML models and algorithms can acquire knowledge based on experience. Integrating ML models to assess mortality risk in clinical workflows enables real-time monitoring, which allows physicians to stratify patients according to their severity and rapidly respond to changes in patient states [<xref ref-type="bibr" rid="ref3">3</xref>]. Despite the potential benefits of ML models in tailoring clinical interventions, hospitals typically have limited local data available to create reliable models [<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>Sharing additional datasets from various health care facilities can significantly enhance the performance and generalizability of ML models [<xref ref-type="bibr" rid="ref5">5</xref>]. This underscores the critical role of data sharing in the advancement of high-performance predictive models in clinical environments. However, within the health care sector, it is common for hospitals to isolate their datasets, often justifying this practice with legitimate privacy concerns while developing an internal model [<xref ref-type="bibr" rid="ref6">6</xref>]. Despite the belief of hospitals in the benefits of data sharing, conducting analysis in a centralized manner, which necessitates the consolidation of datasets from all participating hospitals or centers, heightens the risks associated with data privacy and security, as sensitive information is now disseminated to external entities. Furthermore, the transfer of datasets to a centralized repository, whether through physical means or network channels, creates additional vulnerability for potential data breaches [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. In addition to privacy and security challenges, the administrative burden of orchestrating data sharing is significant, as each participant typically adheres to its own regulations concerning data use and ownership [<xref ref-type="bibr" rid="ref8">8</xref>]. Consequently, a methodology that facilitates collaborative modeling in a decentralized framework, eliminating the requirement to aggregate all datasets in a single location, would significantly enhance the feasibility of multicenter studies.</p><p>Federated learning (FL) has emerged as a novel privacy-preserving technology that offers the potential to create models collaboratively in a decentralized manner to address confidentiality issues, particularly in terms of data privacy. FL was introduced by Google in 2016 [<xref ref-type="bibr" rid="ref9">9</xref>]. The architecture of FL aims to eliminate data exchange between participants. As a collaboratively distributed or decentralized ML privacy-preserving technology, FL eliminates the need to transfer data from the nodes to a central server. The principle of FL, or client-based architecture, enables multiple institutions to collaborate, wherein the baseline model is hosted by a coordinating node and computational nodes download the model and train it on local datasets. FL attempts to formulate models from various datasets and merges knowledge into a globally trained model, which increases the model&#x2019;s efficiency. Offering viable solutions for investigating medical conditions [<xref ref-type="bibr" rid="ref10">10</xref>], particularly those with scarce prevalence or minimal data, to prevent inadequate care resulting from misrepresentation or underrepresentation of certain patient groups [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Despite its numerous benefits, FL has not yet been extensively implemented in clinical settings, and initiatives aimed at enhancing clinical translation are currently in progress. In addition, as a new emerging technology focused on privacy preservation, investigations into the robustness of FL within multiple clinical fields, along with comparative studies against established ML methodologies, are ongoing. In recent years, only a few studies have reviewed the potential benefits of FL in the clinical environment [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]; to the best of our knowledge, comparisons between FL and centralized machine learning (CML) approaches in performing clinical tasks have not yet been quantitatively assessed. Thus, we performed a systematic review and meta-analysis to examine the performance of the FL approach in comparison with single-center&#x2013;based CML in predicting mortality, evaluate the barriers to widespread clinical adoption, and provide insights into future directions of FL-related research in the health care domain. In this context, this study attempts to answer the following question: &#x201C;What is the feasibility and capability of an FL approach for predicting mortality compared with a CML-based model?&#x201D;</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Design</title><p>This systematic review and meta-analysis were conducted in accordance with the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidance [<xref ref-type="bibr" rid="ref14">14</xref>] (<xref ref-type="supplementary-material" rid="app6">Checklist 1</xref>).</p></sec><sec id="s2-2"><title>Eligibility Criteria</title><p>Articles published after 2016 were eligible for inclusion if they investigated research evidence in a clinical context. Articles were included if they quantitatively compared FL and CML models in predicting mortality (before and after federated within-group comparisons) in terms of area under the receiver operating curve (AUROC)/area under the curve (AUC). The eligible study designs included experimental studies that compared the performances of FL and CML. The outcome of interest was mortality prediction.</p><p>Articles were excluded if they did not compare FL with CML or only compared the effectiveness of FL performance in different FL baseline models. The excluded studies consisted of protocols, reviews, studies using only qualitative methods, opinion pieces, and conference abstracts without linked full-text articles. Articles were also excluded if they evaluated the model performance with evaluation metrics other than the AUROC/AUC and if they were not available in English.</p></sec><sec id="s2-3"><title>Information Sources</title><p>A search was conducted in 4 multidisciplinary databases (IEEE Xplore, PubMed, Science Direct, and Web of Science) using EndNote 20 software (Clarivate). The date of the last search was June 23, 2024. Manual searches of the reference lists, citations, and related articles of the included studies were performed to identify additional studies that were missed in the original electronic searches.</p></sec><sec id="s2-4"><title>Search Strategy</title><p>The controlled free-text terms were used through Boolean operators (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). All original studies that developed an ML mortality prediction model were included if they met the predefined inclusion criteria:</p><list list-type="bullet"><list-item><p>Population: Patients in different clinical settings (eg, ICU, emergency department [ED], trauma centers, or specific disease admission).</p></list-item><list-item><p>Intervention: FL model.</p></list-item><list-item><p>Comparator: CML model.</p></list-item><list-item><p>Outcomes: Mortality prediction.</p></list-item></list></sec><sec id="s2-5"><title>Selection Process</title><p>Records from the electronic and citation searches were exported to EndNote Online for deduplication, followed by title, abstract, and full-text screening. One reviewer (NT) extracted the data from all identified studies using a predefined data extraction form, and a second reviewer (NA) checked the data for accuracy. Any disagreements between the reviewers were resolved by consensus. If neither reviewer reached a consensus, a third reviewer (the corresponding author) made the final decision.</p></sec><sec id="s2-6"><title>Data Collection</title><p>Data from the included studies were independently extracted by 2 reviewers (see Selection Process section) using a data extraction form developed a priori. The accuracy of data extraction was confirmed by comparing the extraction forms and returning them to the original article to resolve any disparities.</p></sec><sec id="s2-7"><title>Data Items</title><p>The variables collected were study characteristics, including the data source, number and description of participants, predictors, model development approach, and model performance comparison. For the outcome of interest, the AUROC/AUC, variance, and sample sizes were extracted for each comparison. When these data were missing, they were calculated from other reported statistics using recommended methods [<xref ref-type="bibr" rid="ref15">15</xref>] where possible. For studies that reported multiple outcome measures, only the outcome of interest (mortality prediction) was collected.</p></sec><sec id="s2-8"><title>Risk of Bias Assessment</title><p>Two reviewers independently assessed the risk of bias using the PROBAST (Prediction Model Risk of Bias Assessment Tool) [<xref ref-type="bibr" rid="ref16">16</xref>]. Disagreements were resolved through discussion with other researchers. The PROBAST includes 20 signaling questions across 4 key domains (participants, predictors, outcome, and analysis), and each domain is assessed for a low, high, or unclear risk of bias. The CHARMS (Checklist for Critical Appraisal and Data Extraction for Systematic Reviews of Prediction Modeling Studies) was also examined in conjunction with the PROBAST tool [<xref ref-type="bibr" rid="ref17">17</xref>]. The PROBAST tool, its considerations, and related publications are available on the PROBAST website [<xref ref-type="bibr" rid="ref18">18</xref>]. <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> provides a summary of the criteria used to determine the risk of bias.</p></sec><sec id="s2-9"><title>Data Synthesis</title><p>The included studies were summarized narratively in text, tables, and figures. The discriminant ability, namely the capability to distinguish surviving patients and death events, of the prediction model was extracted (ranging from 0.5 [no discriminative ability] to 1 [perfect discriminative ability]) [<xref ref-type="bibr" rid="ref15">15</xref>]. Owing to the lack of calibration plots and summaries of calibration, the agreement between the frequency of observed events and the predicted probabilities was not assessed. Prognostic prediction models with effect sizes (AUROCs) for the same outcome were synthesized and analyzed using the metafor package in R (version 4.3.3; R Core Team, R Foundation for Statistical Computing). As the included studies typically differ in design and execution (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]), variations in their results are unlikely to occur by chance only. Thus, standard errors were estimated based on a normal distribution assumption. In addition, the presence of heterogeneity was considered, and the summary result with its 95% CI, which quantified the average performance across studies, was assessed by implementing a random, rather than fixed-effect, meta-analysis model [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. We evaluated the heterogeneity between the included studies using the Higgins <italic>I</italic><sup>2</sup> test (<italic>I</italic><sup>2</sup>&#x2264;25% for low, <italic>I</italic><sup>2</sup>&#x003C;50% for moderate, and <italic>I</italic><sup>2</sup>&#x2265;50% for high) (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> [<xref ref-type="bibr" rid="ref24">24</xref>]).</p></sec><sec id="s2-10"><title>Certainty Assessment</title><p>The performance of FL and CML for each outcome was evaluated using the C statistic. When measures of uncertainty were not reported, we approximated the standard error of the C statistic using the appropriate and suggested measurements (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> [<xref ref-type="bibr" rid="ref24">24</xref>]).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>In total, 1228 records were identified, 29 full-text reports were screened, and 9 articles were included (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flow diagram illustrating the process of the study selection for a systematic review on the federated learning&#x2013;based model for predicting mortality, detailing a total of 1228 records identified (1201 excluded), 29 full-text reports screened (20 excluded), and 9 articles included. AUC: area under the curve; AUROC: area under the receiver operating curve; CML: centralized machine learning; ICU: intensive care unit; PTE: pulmonary thromboendarterectomy.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e65708_fig01.png"/></fig><sec id="s3-1"><title>Included Studies</title><p>Study selection was performed in 3 stages. In the first stage, 1228 studies were screened for duplication using the EndNote 20 software. In the second stage, potentially relevant studies were assessed by comparing the titles and abstracts (n=1178) against the predetermined inclusion criteria. In the third stage, studies (n=27) that appeared to meet the inclusion criteria and articles (n=2) that were sought from the citations were obtained for detailed assessment.</p><p>Among the 29 studies that were identified and assessed for eligibility, 20 were excluded because they had no comparison with CML or only included other FL-based models as the comparator group, used evaluation metrics other than the AUROC/AUC value, and were review articles or retracted articles. The characteristics and respective references of all the included studies are presented in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. Five studies were conducted in the United States, 3 in Asia, and 1 in Europe. Of the 9 studies, 8 were retrospective cohort studies using institutional data sources [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. The minimum prediction window was 24 hours and the maximum was 30 days. The median sample size was 28,000 (minimum: 3055; maximum: 1,222,554). Prediction models were developed in all studies using internal validation. The most common ML techniques for the reported models were neural networks (6/9, 67%) [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>] and logistic regression (3/9, 33%) [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>].</p></sec><sec id="s3-2"><title>Risk of Bias</title><p>In total, 5 studies were rated as having a low risk of bias [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>], while 4 out of 9 of the developed models [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>] were identified as having a high risk of bias (<xref ref-type="fig" rid="figure2">Figure 2</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]; <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]). One study [<xref ref-type="bibr" rid="ref20">20</xref>] did not provide information related to the preprocessing step; therefore, the data quality assessment was unclear, and this study was rated as having an unclear risk of bias in the participants&#x2019; domain (<xref ref-type="fig" rid="figure2">Figure 2</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]). The most common reasons for the risk of bias included insufficient information regarding the number of missing data, handling of missing values, and complexities in the data. In terms of applicability, information regarding adherence to the TRIPOD (Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis) statement was reported in only one study [<xref ref-type="bibr" rid="ref27">27</xref>]. However, the outcome (in this case, mortality) predicted in all developed models matched the review question. Therefore, all of the included studies could be judged as having a low risk for applicability.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Risk of bias assessment [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e65708_fig02.png"/></fig></sec><sec id="s3-3"><title>FL and Mortality Prediction: State-of-the-Art</title><p>Descriptions of the included studies are provided in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. Budrionis et al [<xref ref-type="bibr" rid="ref19">19</xref>] developed a feed-forward neural network combined with a recurrent neural network as the baseline ML model to predict mortality among patients in ICU. Their results demonstrated that the performance of the FL model was comparable to that of the CML in terms of the <italic>F</italic><sub>1</sub>-score and AUROC. However, the FL model training and inference required approximately 9 and 40 times longer, respectively, than the equivalent tasks that were executed in centralized settings. Using the same publicly available ICU dataset and deep learning-based approach, Randl et al [<xref ref-type="bibr" rid="ref24">24</xref>] consistently demonstrated the comparable performance between FL and CML models in different schemes and with different numbers of clients.</p><p>Using the same ML-based model approach, logistic regression, 3 studies consistently showed that FL-based models outperformed centralized models [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Li et al [<xref ref-type="bibr" rid="ref22">22</xref>] incorporated 10 simulated sites from a tertiary hospital in Singapore by implementing a scoring-based system (the FedScore model) to facilitate cross-institutional collaborations to predict mortality within 30 days after ED visits. Similarly, FL models outperformed CML in predicting the mortality of hospitalized patients with COVID-19 and pulmonary thromboendarterectomy using a real-world dataset [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Pfitzner et al [<xref ref-type="bibr" rid="ref23">23</xref>] used a neural network-based model for predicting patient mortality and revision surgery after visceral operations, demonstrating that the FL model performed better than CML in terms of the AUROC.</p><p>Three studies showed that the centralized model still performs well when compared with FL with subtle distinctions. All 3 studies developed neural network-based prediction models. Huang et al [<xref ref-type="bibr" rid="ref20">20</xref>] introduced a community-based FL algorithm, where distributed data were clustered into clinically meaningful communities based on similar diagnoses and geographical locations. Their evaluation showed that the community-based FL predictive performance was not substantially dissimilar from CML in predicting mortality in an ICU setting. The CML model also performed better than FL but not significantly different when predicting mortality in an emergency setting [<xref ref-type="bibr" rid="ref21">21</xref>] and among patients with COVID-19 [<xref ref-type="bibr" rid="ref25">25</xref>]. In the study by Shiri et al [<xref ref-type="bibr" rid="ref25">25</xref>], the mean AUCs of 0.82 (95% CI 0.79&#x2010;0.85) and 0.81 (95% CI 0.77&#x2010;0.84) were achieved by the centralized and FL models, respectively. However, the DeLong test indicated that the differences were not statistically significant (<italic>P</italic>=.98).</p></sec><sec id="s3-4"><title>Predictive Performance</title><p>Most studies used more than one evaluation metric to describe the performance of the developed models (eg, AUROC or AUC, sensitivity or recall, specificity, precision, accuracy, area under the precision-recall curve, and <italic>F</italic><sub>1</sub>-score; <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]). The calibration performance was not reported in any of the included studies. The pooled AUC with a 95% CI and heterogeneity indices for the FL and CML performance were 0.81 (95% CI 0.76&#x2010;0.85; <italic>I</italic><sup>2</sup>=78.36%) and 0.82 (95% CI 0.77&#x2010;0.86; <italic>I</italic><sup>2</sup>=72.33%), respectively (<xref ref-type="fig" rid="figure3">Figure 3</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]). The performance of FL was similar to that of CML in its ability to predict mortality in various clinical settings.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Pooled area under the curve (AUC) of federated learning (FL) and centralized machine learning (CML) [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref27">27</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e65708_fig03.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>This study reveals the current state of research related to the implementation of the FL approach in health care. More than half (5/9, 56%) of the studies were found in medical journals, which suggests that the clinical use of FL in the medical domain is increasingly gaining popularity. Compared with the nonprivate CML, the FL-based models exhibited sufficient discrimination ability in predicting mortality when operated across various clinical settings (ie, ICU, ED, and specific disease). There is evidence that FL-based models performed similarly [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref24">24</xref>] to or better than CML [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>] when developed using different predictors (eg, laboratory values, vital signs) in distinct schemes (ie, configuration setting, scoring-based) and prediction windows (ie, 24 h, 48 h, 72 h, 96 h, 7 days, and 30 days). This performance persists despite the given cost and complexity associated with node orchestration, privacy preservation, and extra steps that do not exist in centralized approaches. The pooled AUC values for FL and CML were 0.81 (95% CI 0.76&#x2010;0.85) and 0.82 (95% CI 0.77&#x2010;0.86), respectively, proving the feasibility of FL implementation in a health care setting given the high imbalance and nonindependent and identically distributed (non-IID) nature of the clinical dataset.</p><p>All of the included studies reported findings in favor of the FL approach. The performance of the FL model was not affected by the number of computational nodes or data distribution across the nodes, which concurred with the existing literature on the implementation of FL [<xref ref-type="bibr" rid="ref13">13</xref>]. However, there was a considerable complication effect of FL algorithms in terms of the training time, inference duration, and communication rounds. The training and prediction times of the FL model were reported to be 9 and 40 times longer than those of the CML model, respectively [<xref ref-type="bibr" rid="ref19">19</xref>]. This finding is supported by a prior study that revealed the effect of FL infrastructure computational overheads in increasing the inference time duration [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Although the inference duration seems relatively high compared with that of CML, the previous work suggested that in real-life deployments, predictions are likely to be made for individual patients rather than large patient groups. Therefore, longer times for making predictions are likely negligible [<xref ref-type="bibr" rid="ref28">28</xref>]. The communication rounds, which indicated the learning speed of the model, were reported to be 57 times slower in FL than in CML [<xref ref-type="bibr" rid="ref20">20</xref>]; that is, the FL algorithm required higher communication costs between the hospital and server to achieve convergence. The iterative nature of the FL model, in which each round must ensure that all relevant updates are incorporated into the global model for convergence, has been explored extensively in previous experimental studies. However, it has been suggested that the robustness of the FL performance was not affected [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>].</p><p>Although no study has quantitatively summarized the discriminant ability of FL compared with CML in clinical settings, it has been qualitatively demonstrated that FL models enhance the generalizability and analysis power while conquering privacy risks [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Consistent with this finding, our meta-analysis found insubstantial differences in the pooled AUC between the FL and CML for predicting mortality in various scenarios. Similar to a previous systematic review [<xref ref-type="bibr" rid="ref13">13</xref>], we found that most of the developed FL models used a neural network approach, whereas the remaining minority used logistic regression. This may correlate with the data type used in the current FL research, where neural networks have been shown to provide excellent performance. In addition, the large variety of data types, both structured and unstructured, that have been successfully used in FL models is encouraging.</p><p>The FL model was not only experimentally trained using supervised data type. The use of radiological data was reported by Shiri et al [<xref ref-type="bibr" rid="ref25">25</xref>]. In the field of medical imaging, data annotation is one of the crucial and labor-intensive tasks. Through the incorporation of the FL-based model, different institutions can benefit from each other&#x2019;s annotations without even sharing them. Training deep learning algorithms requires high computational power and memory space. The use of the FL model offers the promise to enhance efficiency in training and memory consumption for AI-assisted medical image analysis algorithms [<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>Previous studies have reported that the global FL model is more robust and achieves greater accuracy at individual sites than models trained solely on local data for predicting mortality [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. This improvement is likely attributable to the availability of larger and more diverse datasets, the use of input data that can be standardized, and the avoidance of clinical impressions or reported symptoms. These factors collectively enhance the benefits of the FL approach and its impact on performance, generalizability, and ultimately, the model&#x2019;s usability in the clinical domain. For client sites with relatively small datasets, 2 common approaches could be used for fitting a useful model: one is to train locally with its own data, and the other is to apply a model trained on a larger dataset [<xref ref-type="bibr" rid="ref32">32</xref>]. The finding is that the global FL model can increase the accuracy of the locally trained model to predict mortality in ED and non-ED admissions while the number of patients was relatively small with a low percentage of desirable outcomes [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref27">27</xref>], indicating that the benefit for client sites with small datasets arising from participation in FL collaborations is substantial.</p><p>Consistent with the prior study, the FL model was proved to have the ability to capture more diversity than local training and to mitigate the bias present in models trained on a homogenous population [<xref ref-type="bibr" rid="ref32">32</xref>]. In clinical domains, however, data are frequently formed at the hospital or institution level, making local models feasible in these cases. Under these circumstances, the generalizability and stability of global models relative to local models become more crucial. Li et al [<xref ref-type="bibr" rid="ref22">22</xref>] showed that by a cotraining process via FL, a global model prediction framework such as FedScore can achieve less variation than locally developed ones while still maintaining good performance. This benefit of FL is promising for medical research that seeks dependable high-risk decision-making.</p><p>Although FL mitigates privacy risks by design, certain attacks, such as membership and property inference attacks, are still possible [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. However, in line with previous studies [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref37">37</xref>], we found that implementing differential privacy (DP) to mitigate inference attacks in the FL model remains challenging. Although DP adds an extra layer of privacy protection, a trade-off exists between privacy, accuracy, and model fairness in FL with DP [<xref ref-type="bibr" rid="ref35">35</xref>]. We found that strong privacy protection can be provided at the cost of performance degradation [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref25">25</xref>].</p><p>FL faces significant challenges due to data heterogeneity, which refers to the nonuniform distribution of data across participating clients. This heterogeneity arises from differences in data types, feature distributions, and class imbalances. We found that both evenly distributed and non-IID datasets used for developing FL-based models were reported. A previous study found that non-IID data can significantly reduce the model accuracy, which is explained by the weight divergence between local and global distributions [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Consistent with the existing literature [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], the use of hierarchical local clustering to improve convergence and accuracy was also proposed by our findings [<xref ref-type="bibr" rid="ref20">20</xref>]. A previous study reported that clustering patients with common features into the same community and learning separate models for individual communities not only enhances the predictive accuracy in fewer communication rounds but also allows for the interpretability of the prediction results [<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>In line with the previous literature [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], most implementations of the prediction model development were performed using a retrospective cohort, extracted from a publicly available dataset, rather than a clinical study design. According to our findings, most of the included studies were in the development phase, in which the models were tested and optimized without external validation. Thus, significant development is still required to improve the maturity of technologies during the conceptualization, development, and application stages. In addition, the models must be tested using real-time data. Additional development is also necessary to introduce the models to the clinical workflow, evaluate clinical outcomes, and integrate the models into the hospital environment.</p><p>The included studies demonstrated sufficient discrimination ability, which is a prerequisite for clinical acceptance [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. However, prior to this, external validation within a clinical workflow must be established. In future research, it will be crucial to ascertain whether the model encompasses both treated and untreated patients and how the treatment effects are handled in the models. Furthermore, the establishment of a real-time data infrastructure is imperative for effectively coping with unknowns.</p><p>Offering global collaboration, predictive power, and privacy preservation through FL, medical institutions worldwide could share insights and collectively train predictive models for treatment strategies while safeguarding patient information. FL&#x2019;s decentralized nature allowed for real-time data analysis and rapid response. Health care professionals and researchers could continuously update and refine models as new data became available, leading to more accurate predictions and recommendations. Additionally, FL facilitated global collaboration, enabling experts from various regions to pool their data effectively. This collaborative approach is instrumental in improving care strategies, as researchers worldwide could collectively analyze clinical trial data without compromising data privacy. Overall, FL emerged as a crucial tool that enables privacy-preserving data collaboration, real-time analysis, and global cooperation among health care professionals and researchers.</p></sec><sec id="s4-2"><title>Strengths and Limitations</title><p>The key strengths of this systematic review are the quantitative meta-analytical methods that allowed for robust conclusions based on cumulative evidence regarding the feasibility of FL approaches for clinical implementation. Focusing on the pooled AUC, which is a well-established metric for evaluating the discriminatory ability of predictive models, this study provides a clear and quantifiable measure of how well each model predicts mortality risk, thereby facilitating straightforward comparisons. By aggregating data from multiple studies in different clinical settings, this meta-analysis has captured a wide range of clinical environments and patient populations to provide clinicians with reliable information on which predictive modeling approach may be more effective in their specific settings. In addition, this study offers more precise estimates of the model performance to aid in identifying subtle differences between the FL and CML approaches owing to the involvement of over a million study participants.</p><p>This study has several limitations. First, gray literature was not included. In addition, meta-regression was not conducted because of the small number of studies. Second, the high risk of bias most often originated in the analysis domain owing to values not being reported or the inappropriate handling of missing values, as well as methods for dealing with data complexities not being reported. In addition, differences in the predictors, prediction windows, study characteristics, and clinical settings were potential sources of heterogeneity among the included studies. Moreover, these studies included only adult patients. This may pose challenges to generalizability and fairness when applied to a broader population. Finally, the calibration performance was not evaluated because of the lack of studies. The lack of calibration in FL models can severely limit their applicability across different populations.</p></sec><sec id="s4-3"><title>Conclusions</title><p>In conclusion, FL-based models can achieve a performance similar to that of centralized models trained on pooled data while preserving data privacy in predicting mortality across various clinical settings. This study demonstrates the feasibility of using FL models to construct a risk prediction model for mortality prediction while addressing data privacy concerns, which is helpful for clinical practice. However, the included studies only performed an internal validation of the data, and researchers should be encouraged to perform and report external validation of the available models. The former type of studies often overestimated the true predictive performance. Future research directions include a repetition of this review to keep up with the rapidly growing use of FL-based models in the clinical environment, and further evaluation and exploration of how FL is performed in different groups of patients and specific cases to assess research evidence.</p></sec></sec></body><back><ack><p>We would like to express our gratitude to the Ministry of Education for providing the scholarship that supported this research.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated and analyzed during this study will be available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>NT was responsible for the conceptualization, methodology, software development, data curation, formal analysis, visualization, writing of the original draft, and review and editing of the manuscript. CRJ contributed to the conceptualization, software development, formal analysis, and review and editing of the manuscript. SDL was involved in the conceptualization, methodology, and review and editing of the manuscript. NA contributed to formal analysis, visualization, and review and editing. WCH was responsible for conceptualization, data curation, methodology, supervision, and review and editing of the manuscript. TCL contributed to methodology, formal analysis, and supervision.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb2">AUROC</term><def><p>area under the receiver operating curve</p></def></def-item><def-item><term id="abb3">CHARMS</term><def><p>Checklist for Critical Appraisal and Data Extraction for Systematic Reviews of Prediction Modeling Studies</p></def></def-item><def-item><term id="abb4">CML</term><def><p>centralized machine learning</p></def></def-item><def-item><term id="abb5">DP</term><def><p>differential privacy</p></def></def-item><def-item><term id="abb6">ED</term><def><p>emergency department</p></def></def-item><def-item><term id="abb7">ED</term><def><p>emergency department</p></def></def-item><def-item><term id="abb8">FL</term><def><p>federated learning</p></def></def-item><def-item><term id="abb9">ICU</term><def><p>intensive care unit</p></def></def-item><def-item><term id="abb10">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb11">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb12">PROBAST</term><def><p>Prediction Model Risk of Bias Assessment Tool</p></def></def-item><def-item><term id="abb13">TRIPOD</term><def><p>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Johnson</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Pollard</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Naumann</surname><given-names>T</given-names> </name></person-group><article-title>Generalizability of predictive models for intensive care unit patients</article-title><comment>Preprint posted online on 2018</comment><pub-id pub-id-type="doi">10.48550/arXiv.1812.02275</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krasowski</surname><given-names>A</given-names> </name><name name-style="western"><surname>Krois</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kuhlmey</surname><given-names>A</given-names> </name><name name-style="western"><surname>Meyer-Lueckel</surname><given-names>H</given-names> </name><name name-style="western"><surname>Schwendicke</surname><given-names>F</given-names> </name></person-group><article-title>Predicting mortality in the very old: a machine learning analysis on claims data</article-title><source>Sci Rep</source><year>2022</year><month>10</month><day>19</day><volume>12</volume><issue>1</issue><fpage>17464</fpage><pub-id pub-id-type="doi">10.1038/s41598-022-21373-3</pub-id><pub-id pub-id-type="medline">36261581</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wardhana</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wibowo</surname><given-names>J</given-names> </name></person-group><article-title>Predicting mortality in burn patients: literature review of risk factors for burn mortality and application in scoring systems</article-title><source>Ann Burns Fire Disasters</source><year>2023</year><month>03</month><volume>36</volume><issue>1</issue><fpage>3</fpage><lpage>10</lpage><pub-id pub-id-type="medline">38680910</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moshawrab</surname><given-names>M</given-names> </name><name name-style="western"><surname>Adda</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bouzouane</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ibrahim</surname><given-names>H</given-names> </name><name name-style="western"><surname>Raad</surname><given-names>A</given-names> </name></person-group><article-title>Reviewing federated machine learning and its use in diseases prediction</article-title><source>Sensors (Basel)</source><year>2023</year><month>02</month><day>13</day><volume>23</volume><issue>4</issue><fpage>2112</fpage><pub-id pub-id-type="doi">10.3390/s23042112</pub-id><pub-id pub-id-type="medline">36850717</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maddox</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Rumsfeld</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Payne</surname><given-names>PRO</given-names> </name></person-group><article-title>Questions for artificial intelligence in health care</article-title><source>JAMA</source><year>2019</year><month>01</month><day>1</day><volume>321</volume><issue>1</issue><fpage>31</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1001/jama.2018.18932</pub-id><pub-id pub-id-type="medline">30535130</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>R</given-names> </name><name name-style="western"><surname>Srivastava</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sahu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tiwari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ambasta</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>P</given-names> </name></person-group><article-title>Artificial intelligence to deep learning: machine intelligence approach for drug discovery</article-title><source>Mol Divers</source><year>2021</year><month>08</month><volume>25</volume><issue>3</issue><fpage>1315</fpage><lpage>1360</lpage><pub-id pub-id-type="doi">10.1007/s11030-021-10217-3</pub-id><pub-id pub-id-type="medline">33844136</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jochems</surname><given-names>A</given-names> </name><name name-style="western"><surname>Deist</surname><given-names>TM</given-names> </name><name name-style="western"><surname>El Naqa</surname><given-names>I</given-names> </name><etal/></person-group><article-title>Developing and validating a survival prediction model for NSCLC patients through distributed learning across 3 countries</article-title><source>Int J Radiat Oncol Biol Phys</source><year>2017</year><month>10</month><day>1</day><volume>99</volume><issue>2</issue><fpage>344</fpage><lpage>352</lpage><pub-id pub-id-type="doi">10.1016/j.ijrobp.2017.04.021</pub-id><pub-id pub-id-type="medline">28871984</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Benedetto</surname><given-names>U</given-names> </name><name name-style="western"><surname>Dimagli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sinha</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Machine learning improves mortality risk prediction after cardiac surgery: systematic review and meta-analysis</article-title><source>J Thorac Cardiovasc Surg</source><year>2022</year><month>06</month><volume>163</volume><issue>6</issue><fpage>2075</fpage><lpage>2087</lpage><pub-id pub-id-type="doi">10.1016/j.jtcvs.2020.07.105</pub-id><pub-id pub-id-type="medline">32900480</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Diniz</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Vasconcelos</surname><given-names>H</given-names> </name><name name-style="western"><surname>Souza</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rb-Silva</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ameijeiras-Rodriguez</surname><given-names>C</given-names> </name><name name-style="western"><surname>Freitas</surname><given-names>A</given-names> </name></person-group><article-title>Comparing decentralized learning methods for health data models to nondecentralized alternatives: protocol for a systematic review</article-title><source>JMIR Res Protoc</source><year>2023</year><month>06</month><day>19</day><volume>12</volume><fpage>e45823</fpage><pub-id pub-id-type="doi">10.2196/45823</pub-id><pub-id pub-id-type="medline">37335606</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Watson</surname><given-names>OJ</given-names> </name><name name-style="western"><surname>Barnsley</surname><given-names>G</given-names> </name><name name-style="western"><surname>Toor</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hogan</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Winskill</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ghani</surname><given-names>AC</given-names> </name></person-group><article-title>Global impact of the first year of COVID-19 vaccination: a mathematical modelling study</article-title><source>Lancet Infect Dis</source><year>2022</year><month>09</month><volume>22</volume><issue>9</issue><fpage>1293</fpage><lpage>1302</lpage><pub-id pub-id-type="doi">10.1016/S1473-3099(22)00320-6</pub-id><pub-id pub-id-type="medline">35753318</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ku</surname><given-names>E</given-names> </name><name name-style="western"><surname>Amaral</surname><given-names>S</given-names> </name><name name-style="western"><surname>McCulloch</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Adey</surname><given-names>DB</given-names> </name><name name-style="western"><surname>Li</surname><given-names>L</given-names> </name><name name-style="western"><surname>Johansen</surname><given-names>KL</given-names> </name></person-group><article-title>Comparison of 2021 CKD-EPI equations for estimating racial differences in preemptive waitlisting for kidney transplantation</article-title><source>Clin J Am Soc Nephrol</source><year>2022</year><month>10</month><volume>17</volume><issue>10</issue><fpage>1515</fpage><lpage>1521</lpage><pub-id pub-id-type="doi">10.2215/CJN.04850422</pub-id><pub-id pub-id-type="medline">36122938</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>R</given-names> </name><name name-style="western"><surname>Romano</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Moore</surname><given-names>JH</given-names> </name></person-group><article-title>Centralized and federated models for the analysis of clinical data</article-title><source>Annu Rev Biomed Data Sci</source><year>2024</year><month>08</month><volume>7</volume><issue>1</issue><fpage>179</fpage><lpage>199</lpage><pub-id pub-id-type="doi">10.1146/annurev-biodatasci-122220-115746</pub-id><pub-id pub-id-type="medline">38723657</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Teo</surname><given-names>ZL</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Federated machine learning in healthcare: a systematic review on clinical applications and technical architecture</article-title><source>Cell Rep Med</source><year>2024</year><month>02</month><day>20</day><volume>5</volume><issue>2</issue><fpage>101419</fpage><pub-id pub-id-type="doi">10.1016/j.xcrm.2024.101419</pub-id><pub-id pub-id-type="medline">38340728</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Page</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Bossuyt</surname><given-names>PM</given-names> </name><etal/></person-group><article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title><source>BMJ</source><year>2021</year><month>03</month><day>29</day><volume>372</volume><fpage>n71</fpage><pub-id pub-id-type="doi">10.1136/bmj.n71</pub-id><pub-id pub-id-type="medline">33782057</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Debray</surname><given-names>TPA</given-names> </name><name name-style="western"><surname>Damen</surname><given-names>JAAG</given-names> </name><name name-style="western"><surname>Snell</surname><given-names>KIE</given-names> </name><etal/></person-group><article-title>A guide to systematic review and meta-analysis of prediction model performance</article-title><source>BMJ</source><year>2017</year><month>01</month><day>5</day><volume>356</volume><fpage>i6460</fpage><pub-id pub-id-type="doi">10.1136/bmj.i6460</pub-id><pub-id pub-id-type="medline">28057641</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><name name-style="western"><surname>Wolff</surname><given-names>RF</given-names> </name><name name-style="western"><surname>Riley</surname><given-names>RD</given-names> </name><etal/></person-group><article-title>PROBAST: a tool to assess risk of bias and applicability of prediction model studies: explanation and elaboration</article-title><source>Ann Intern Med</source><year>2019</year><month>01</month><day>1</day><volume>170</volume><issue>1</issue><fpage>W1</fpage><lpage>W33</lpage><pub-id pub-id-type="doi">10.7326/M18-1377</pub-id><pub-id pub-id-type="medline">30596876</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fernandez-Felix</surname><given-names>BM</given-names> </name><name name-style="western"><surname>L&#x00F3;pez-Alcalde</surname><given-names>J</given-names> </name><name name-style="western"><surname>Roqu&#x00E9;</surname><given-names>M</given-names> </name><name name-style="western"><surname>Muriel</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zamora</surname><given-names>J</given-names> </name></person-group><article-title>CHARMS and PROBAST at your fingertips: a template for data extraction and risk of bias assessment in systematic reviews of predictive models</article-title><source>BMC Med Res Methodol</source><year>2023</year><month>02</month><day>17</day><volume>23</volume><issue>1</issue><fpage>44</fpage><pub-id pub-id-type="doi">10.1186/s12874-023-01849-0</pub-id><pub-id pub-id-type="medline">36800933</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><article-title>PROBAST: comprehensive and easy to use criteria and tool to assess the trustworthiness, value, fairness, quality, risk of bias and applicability of any type of prediction algorithm or model, including AI based models</article-title><source>PROBAST</source><access-date>2025-06-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.probast.org/">https://www.probast.org/</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Budrionis</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Benchmarking PySyft federated learning framework on MIMIC-III dataset</article-title><source>IEEE Access</source><year>2021</year><volume>9</volume><fpage>116869</fpage><lpage>116878</lpage></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Shea</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Qian</surname><given-names>H</given-names> </name><name name-style="western"><surname>Masurkar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>H</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>D</given-names> </name></person-group><article-title>Patient clustering improves efficiency of federated machine learning to predict mortality and hospital stay time using distributed electronic medical records</article-title><source>J Biomed Inform</source><year>2019</year><month>11</month><volume>99</volume><fpage>103291</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2019.103291</pub-id><pub-id pub-id-type="medline">31560949</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kerkouche</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Privacy-preserving and bandwidth-efficient federated learning: an application to in-hospital mortality prediction</article-title><year>2021</year><conf-name>Proceedings of the Conference on Health, Inference, and Learning Virtual Event, USA, Association for Computing Machinery</conf-name><conf-date>Apr 8-10, 2021</conf-date><conf-loc>Virtual Event, USA</conf-loc><fpage>25</fpage><lpage>35</lpage><pub-id pub-id-type="doi">10.1145/3450439.3451859</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ning</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ong</surname><given-names>MEH</given-names> </name><etal/></person-group><article-title>FedScore: a privacy-preserving framework for federated scoring system development</article-title><source>J Biomed Inform</source><year>2023</year><month>10</month><volume>146</volume><fpage>104485</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2023.104485</pub-id><pub-id pub-id-type="medline">37660960</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Pfitzner</surname><given-names>B</given-names> </name><name name-style="western"><surname>Maurer</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Winter</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Differentially-private federated learning with non-IID data for surgical risk prediction</article-title><year>2024</year><conf-name>2024 IEEE First International Conference on Artificial Intelligence for Medicine, Health and Care (AIMHC)</conf-name><conf-date>Feb 5-7, 2024</conf-date><conf-loc>Laguna Hills, CA, USA</conf-loc><pub-id pub-id-type="doi">10.1109/AIMHC59811.2024.00030</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Randl</surname><given-names>K</given-names> </name><name name-style="western"><surname>Llad&#x00F3;s Armengol</surname><given-names>N</given-names> </name><name name-style="western"><surname>Mondrejevski</surname><given-names>L</given-names> </name><name name-style="western"><surname>Miliou</surname><given-names>I</given-names> </name></person-group><article-title>Early prediction of the risk of ICU mortality with deep federated learning</article-title><year>2023</year><conf-name>2023 IEEE 36th International Symposium on Computer-Based Medical Systems (CBMS)</conf-name><conf-date>Jun 22-24, 2023</conf-date><conf-loc>L&#x2019;Aquila, Italy</conf-loc><pub-id pub-id-type="doi">10.1109/CBMS58004.2023.00304</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shiri</surname><given-names>I</given-names> </name><name name-style="western"><surname>Salimi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sirjani</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Differential privacy preserved federated learning for prognostic modeling in COVID-19 patients using large multi-institutional chest CT dataset</article-title><source>Med Phys</source><year>2024</year><month>07</month><volume>51</volume><issue>7</issue><fpage>4736</fpage><lpage>4747</lpage><pub-id pub-id-type="doi">10.1002/mp.16964</pub-id><pub-id pub-id-type="medline">38335175</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>J</given-names> </name></person-group><article-title>Federated-learning-based prognosis assessment model for acute pulmonary thromboembolism</article-title><source>BMC Med Inform Decis Mak</source><year>2024</year><month>05</month><day>27</day><volume>24</volume><issue>1</issue><fpage>141</fpage><pub-id pub-id-type="doi">10.1186/s12911-024-02543-x</pub-id><pub-id pub-id-type="medline">38802861</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vaid</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jaladanki</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Federated learning of electronic health records to improve mortality prediction in hospitalized patients with COVID-19: machine learning approach</article-title><source>JMIR Med Inform</source><year>2021</year><month>01</month><day>27</day><volume>9</volume><issue>1</issue><fpage>e24207</fpage><pub-id pub-id-type="doi">10.2196/24207</pub-id><pub-id pub-id-type="medline">33400679</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Shin</surname><given-names>SY</given-names> </name></person-group><article-title>Federated learning on clinical benchmark data: performance assessment</article-title><source>J Med Internet Res</source><year>2020</year><month>10</month><day>26</day><volume>22</volume><issue>10</issue><fpage>e20891</fpage><pub-id pub-id-type="doi">10.2196/20891</pub-id><pub-id pub-id-type="medline">33104011</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Jeong</surname><given-names>E</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Communication-efficient on device machine learning: federated distillation and augmentation under non-IID private data</article-title><comment>Preprint posted online on 2018</comment><pub-id pub-id-type="doi">10.48550/arXiv.1811.11479</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>L</given-names> </name><etal/></person-group><article-title>LoAdaBoost: loss-based adaboost federated machine learning on medical data</article-title><comment>Preprint posted online on 2018</comment><pub-id pub-id-type="doi">10.48550/arXiv.1811.12629</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rauniyar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hagos</surname><given-names>DH</given-names> </name><name name-style="western"><surname>Jha</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Federated learning for medical applications: a taxonomy, current trends, challenges, and future research directions</article-title><source>IEEE Internet Things J</source><year>2024</year><volume>11</volume><issue>5</issue><fpage>7374</fpage><lpage>7398</lpage><pub-id pub-id-type="doi">10.1109/JIOT.2023.3329061</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dayan</surname><given-names>I</given-names> </name><name name-style="western"><surname>Roth</surname><given-names>HR</given-names> </name><name name-style="western"><surname>Zhong</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Federated learning for predicting clinical outcomes in patients with COVID-19</article-title><source>Nat Med</source><year>2021</year><month>10</month><volume>27</volume><issue>10</issue><fpage>1735</fpage><lpage>1743</lpage><pub-id pub-id-type="doi">10.1038/s41591-021-01506-3</pub-id><pub-id pub-id-type="medline">34526699</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>T</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name></person-group><article-title>Treatment and prognosis of COVID&#x2011;19: current scenario and prospects (review)</article-title><source>Exp Ther Med</source><year>2021</year><volume>20</volume><issue>6</issue><fpage>1</fpage><lpage>1</lpage><pub-id pub-id-type="doi">10.3892/etm.2020.9435</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nampalle</surname><given-names>KB</given-names> </name><name name-style="western"><surname>Manhas</surname><given-names>S</given-names> </name><name name-style="western"><surname>Raman</surname><given-names>B</given-names> </name></person-group><article-title>Medical image security and authenticity via dual encryption</article-title><source>Appl Intell</source><year>2023</year><month>09</month><volume>53</volume><issue>17</issue><fpage>20647</fpage><lpage>20659</lpage><pub-id pub-id-type="doi">10.1007/s10489-023-04550-3</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Khanna</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schaffer</surname><given-names>V</given-names> </name><name name-style="western"><surname>Gursoy</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gerstein</surname><given-names>M</given-names> </name></person-group><article-title>Privacy-preserving model training for disease prediction using federated learning with differential privacy</article-title><year>2022</year><conf-name>2022 44th Annual International Conference of the IEEE Engineering in Medicine &#x0026; Biology Society (EMBC)</conf-name><conf-date>Jul 11-15, 2022</conf-date><conf-loc>Glasgow, Scotland, United Kingdom</conf-loc><pub-id pub-id-type="doi">10.1109/EMBC48229.2022.9871742</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Pfohl</surname><given-names>SR</given-names> </name><etal/></person-group><article-title>Federated and differentially private learning for electronic health records</article-title><comment>Preprint posted online on 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1911.05861</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Gu</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Efficiently modeling long sequences with structured state spaces</article-title><comment>Preprint posted online on 2022</comment><pub-id pub-id-type="doi">10.48550/arXiv.2111.00396</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Federated learning with non-IID data</article-title><comment>Preprint posted online on 2022</comment><pub-id pub-id-type="doi">10.48550/arXiv.1806.00582</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Mondrejevski</surname><given-names>L</given-names> </name><name name-style="western"><surname>Miliou</surname><given-names>I</given-names> </name><name name-style="western"><surname>Montanino</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pitts</surname><given-names>D</given-names> </name><name name-style="western"><surname>Hollmen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Papapetrou</surname><given-names>P</given-names> </name></person-group><article-title>FLICU: a federated learning workflow for intensive care unit mortality prediction</article-title><year>2022</year><conf-name>2022 IEEE 35th International Symposium on Computer-Based Medical Systems (CBMS)</conf-name><conf-date>Jul 21-23, 2022</conf-date><conf-loc>Shenzen, China</conf-loc><pub-id pub-id-type="doi">10.1109/CBMS55023.2022.00013</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Briggs</surname><given-names>C</given-names> </name><name name-style="western"><surname>Fan</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Andras</surname><given-names>P</given-names> </name></person-group><article-title>Federated learning with hierarchical clustering of local updates to improve training on non-IID data</article-title><year>2020</year><conf-name>2020 International Joint Conference on Neural Networks (IJCNN)</conf-name><conf-date>Jul 19-24, 2020</conf-date><conf-loc>Glasgow, United Kingdom</conf-loc><pub-id pub-id-type="doi">10.1109/IJCNN48605.2020.9207469</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Andaur Navarro</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Damen</surname><given-names>JAA</given-names> </name><name name-style="western"><surname>Takada</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Risk of bias in studies on prediction models developed using supervised machine learning techniques: systematic review</article-title><source>BMJ</source><year>2021</year><month>10</month><day>20</day><volume>375</volume><fpage>n2281</fpage><pub-id pub-id-type="doi">10.1136/bmj.n2281</pub-id><pub-id pub-id-type="medline">34670780</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Frondelius</surname><given-names>T</given-names> </name><name name-style="western"><surname>Atkova</surname><given-names>I</given-names> </name><name name-style="western"><surname>Miettunen</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Early prediction of ventilator-associated pneumonia with machine learning models: a systematic review and meta-analysis of prediction model performance<sup>&#x2730;</sup></article-title><source>Eur J Intern Med</source><year>2024</year><month>03</month><volume>121</volume><fpage>76</fpage><lpage>87</lpage><pub-id pub-id-type="doi">10.1016/j.ejim.2023.11.009</pub-id><pub-id pub-id-type="medline">37981529</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Search strategy.</p><media xlink:href="jmir_v27i1e65708_app1.docx" xlink:title="DOCX File, 18 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Prediction model study Risk Of Bias Assessment Tool (PROBAST) Signaling Question.</p><media xlink:href="jmir_v27i1e65708_app2.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Description of the included articles and full details of meta-analyses.</p><media xlink:href="jmir_v27i1e65708_app3.docx" xlink:title="DOCX File, 26 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Model summary and approximation formula.</p><media xlink:href="jmir_v27i1e65708_app4.docx" xlink:title="DOCX File, 497 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Risk of bias assessment by CHARMS (Checklist for Critical Appraisal and Data Extraction for Systematic Reviews of Prediction Modeling Studies) and PROBAST (Prediction model study Risk Of Bias Assessment Tool) guidelines.</p><media xlink:href="jmir_v27i1e65708_app5.docx" xlink:title="DOCX File, 29 KB"/></supplementary-material><supplementary-material id="app6"><label>Checklist 1</label><p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) checklist.</p><media xlink:href="jmir_v27i1e65708_app6.docx" xlink:title="DOCX File, 32 KB"/></supplementary-material></app-group></back></article>