<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e48595</article-id>
      <article-id pub-id-type="pmid">39079116</article-id>
      <article-id pub-id-type="doi">10.2196/48595</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Early Detection of Pulmonary Embolism in a General Patient Population Immediately Upon Hospital Admission Using Machine Learning to Identify New, Unidentified Risk Factors: Model Development Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>de Azevedo Cardoso</surname>
            <given-names>Taiane</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Leung</surname>
            <given-names>Tiffany</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gartner</surname>
            <given-names>Daniel</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Rajendran</surname>
            <given-names>Jai Hariprasad</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zeng</surname>
            <given-names>Juntong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ben Yehuda</surname>
            <given-names>Ori</given-names>
          </name>
          <degrees>BSc, MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-8957-1493</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Itelman</surname>
            <given-names>Edward</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0142-0253</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Vaisman</surname>
            <given-names>Adva</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2041-0712</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Segal</surname>
            <given-names>Gad</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3851-3245</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Lerner</surname>
            <given-names>Boaz</given-names>
          </name>
          <degrees>BA, MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Industrial Engineering and Management</institution>
            <institution>Ben-Gurion University of the Negev</institution>
            <addr-line>POB 653</addr-line>
            <addr-line>Beer-Sheva, 84105</addr-line>
            <country>Israel</country>
            <phone>972 +972544399763</phone>
            <email>boaz@bgu.ac.il</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0165-3663</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Industrial Engineering and Management</institution>
        <institution>Ben-Gurion University of the Negev</institution>
        <addr-line>Beer-Sheva</addr-line>
        <country>Israel</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Education Authority</institution>
        <institution>Chaim Sheba Medical Center</institution>
        <institution>Faculty of Health Science and Medicine, Tel-Aviv University</institution>
        <addr-line>Tel-Aviv</addr-line>
        <country>Israel</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Cardiology Division, Rabin Medical Center</institution>
        <addr-line>Petach-Tikva</addr-line>
        <country>Israel</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Boaz Lerner <email>boaz@bgu.ac.il</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>30</day>
        <month>7</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e48595</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>10</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>12</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>30</day>
          <month>4</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Ori Ben Yehuda, Edward Itelman, Adva Vaisman, Gad Segal, Boaz Lerner. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 30.07.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e48595" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Under- or late identification of pulmonary embolism (PE)—a thrombosis of 1 or more pulmonary arteries that seriously threatens patients’ lives—is a major challenge confronting modern medicine.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to establish accurate and informative machine learning (ML) models to identify patients at high risk for PE as they are admitted to the hospital, before their initial clinical checkup, by using only the information in their medical records.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We collected demographics, comorbidities, and medications data for 2568 patients with PE and 52,598 control patients. We focused on data available prior to emergency department admission, as these are the most universally accessible data. We trained an ML random forest algorithm to detect PE at the earliest possible time during a patient’s hospitalization—at the time of his or her admission. We developed and applied 2 ML-based methods specifically to address the data imbalance between PE and non-PE patients, which causes misdiagnosis of PE.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The resulting models predicted PE based on age, sex, BMI, past clinical PE events, chronic lung disease, past thrombotic events, and usage of anticoagulants, obtaining an 80% geometric mean value for the PE and non-PE classification accuracies. Although on hospital admission only 4% (1942/46,639) of the patients had a diagnosis of PE, we identified 2 clustering schemes comprising subgroups with more than 61% (705/1120 in clustering scheme 1; 427/701 and 340/549 in clustering scheme 2) positive patients for PE. One subgroup in the first clustering scheme included 36% (705/1942) of all patients with PE who were characterized by a definite past PE diagnosis, a 6-fold higher prevalence of deep vein thrombosis, and a 3-fold higher prevalence of pneumonia, compared with patients of the other subgroups in this scheme. In the second clustering scheme, 2 subgroups (1 of only men and 1 of only women) included patients who all had a past PE diagnosis and a relatively high prevalence of pneumonia, and a third subgroup included only those patients with a past diagnosis of pneumonia.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study established an ML tool for early diagnosis of PE almost immediately upon hospital admission. Despite the highly imbalanced scenario undermining accurate PE prediction and using information available only from the patient’s medical history, our models were both accurate and informative, enabling the identification of patients already at high risk for PE upon hospital admission, even before the initial clinical checkup was performed. The fact that we did not restrict our patients to those at high risk for PE according to previously published scales (eg, Wells or revised Genova scores) enabled us to accurately assess the application of ML on raw medical data and identify new, previously unidentified risk factors for PE, such as previous pulmonary disease, in general populations.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>pulmonary embolism</kwd>
        <kwd>deep vein thrombosis</kwd>
        <kwd>venous thromboembolism</kwd>
        <kwd>imbalanced data</kwd>
        <kwd>clustering</kwd>
        <kwd>risk factors</kwd>
        <kwd>Wells score</kwd>
        <kwd>revised Genova score</kwd>
        <kwd>hospital admission</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Pulmonary Embolism Diagnosis—a Challenge Faced by Modern Medicine</title>
        <p>Pulmonary embolism (PE) occurs when a blood clot enters and blocks (either fully or partially) the pulmonary veins, usually because of a dislodged thrombosis in the deeper veins of the lower limbs (also termed “deep vein thrombosis” [DVT]), which is an aspect of the venous thromboembolism (VTE) phenomenon [<xref ref-type="bibr" rid="ref1">1</xref>]. After myocardial infarction and cerebral stroke, PE is the third most common cause of death from cardiovascular diseases. The worldwide financial burden of PE is immense. Reports have found the estimated cost of a thromboembolic event in the United States to be US $3000 to US $10,000, the total annual cost related to VTE to be US $33,000, and that of VTE complications to be more than US $40,000, with the US health care system’s total annual expenses due to VTE reaching US $12 billion [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. This cost is not significantly different in Europe. By considering the general population admitted to the hospital, we can help reduce mortality rate of later-diagnosed patients, employ early prevention means, and better use hospital resources [<xref ref-type="bibr" rid="ref4">4</xref>], thereby alleviating this financial burden and improve health care.</p>
      </sec>
      <sec>
        <title>Early Identification of Patients at Risk of PE</title>
        <p>Along the VTE continuum, there is a clear correlation between the rapidity of diagnosis and the risk of mortality [<xref ref-type="bibr" rid="ref5">5</xref>]. Various risk factors and symptoms of PE have been suggested over the years. In addition to previous cases, which increase the risk of recurrence, other patient characteristics associated with this condition include being older than 70 years, active malignancy, congestive heart failure, chronic obstructive pulmonary disease, systolic arterial hypertension, presenting with tachypnea, and right ventricular hypokinesis on echocardiography [<xref ref-type="bibr" rid="ref1">1</xref>]. New or worsening shortness of breath, chest pain, or sustained hypotension without an obvious alternative cause [<xref ref-type="bibr" rid="ref5">5</xref>], as well as cigarette smoking, diabetes, obesity, and any type of immobility, is also a contributing factor [<xref ref-type="bibr" rid="ref6">6</xref>]. For suspected PE, hospital wards commonly use scoring methods designed to predict plausibility, such as the Wells score and the revised Geneva score, to decide whether to send a patient for confirmatory tests [<xref ref-type="bibr" rid="ref7">7</xref>]. However, it is difficult to compare the Wells score for PE with scoring methods that rely entirely on medical facts, and the revised Geneva score, which is based entirely on clinical variables rather than the experience of the physician, also requires a reasonable pretest probability or initial suspicion of PE. Moreover, while the former score is considered subjective, the latter is ineffective in safely reducing the number of unnecessary computed tomographic scans [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>In current practice, after a clinical probability assessment, the diagnostic workup should be tailored to the severity of the clinical presentation based on whether the patient’s condition is hemodynamically stable or unstable. In patients with hemodynamic stability, the diagnosis of PE should follow a sequential diagnostic workup potentially consisting of D-dimer blood testing and (if necessary) multidetector computed tomographic angiography (CTA; considered to be the gold standard diagnostic procedure for PE) or ventilation-perfusion scanning (mainly in cases where CTA is contraindicated). Patients with a high pretest probability for PE should not undergo D-dimer blood testing but should immediately be sent for a confirmatory test [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>However, many cases of PE are diagnosed late or misdiagnosed, and there are 2 main reasons for this. The first is that risk factors, which are presumably known, have yet to be proven connected to the disease, and the second is that the diagnostic methods currently used in hospitals are limited; they are not accurate, safe, or sufficiently available for routine use. Over the years, studies on PE have focused mainly on improving its diagnosis by analyzing CTA images, and when considering early diagnosis, the focus was on risk factors and symptoms that may indicate an elevated or high risk for PE.</p>
      </sec>
      <sec>
        <title>Prediction Tools for Patients at Risk of PE</title>
        <p>In the recent years, research on prediction tools for PE has studied several aspects of the disease, for example, by suggesting clinical prediction rules (eg, the Wells and revised Genova scores) [<xref ref-type="bibr" rid="ref9">9</xref>] and comparing them [<xref ref-type="bibr" rid="ref10">10</xref>], predicting PE in clinically suspected patients [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>], predicting adverse outcomes in PE [<xref ref-type="bibr" rid="ref13">13</xref>], and the application of statistical means for prediction [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>].</p>
        <p>In addition, with the advent of artificial intelligence (AI) as a predictive tool, several studies applied AI to the diagnosis of PE. For example, 1 study [<xref ref-type="bibr" rid="ref14">14</xref>] used AI and non-AI tools to predict PE-imaging outcomes based on the patient’s electronic medical record to provide a patient-specific risk score for those referred for computed tomography. Other studies used feature engineering and an artificial neural network to predict PE [<xref ref-type="bibr" rid="ref15">15</xref>] or clustering to differentiate typical clusters of patients [<xref ref-type="bibr" rid="ref16">16</xref>]. Note, however, that all of these studies, like many others, were performed using the data of patients during their hospital stay.</p>
        <p>In a recent study, Ryan et al [<xref ref-type="bibr" rid="ref17">17</xref>] devised an AI algorithm that predicts the chances for PE during hospitalization. The algorithm used data from laboratory tests conducted during the patient's hospital stay and achieved performance values of 81% sensitivity, 35%-70% specificity, and an area under the curve of 0.67-0.85. However, their approach does not meet a challenge faced by clinicians, which is the need to make a diagnosis immediately upon a patient’s emergency department presentation when clinical data are only partially available and laboratory test results are still unavailable. Moreover, if we use the performance values above to calculate the positive predictive value (PPV), that is, precision, measuring the chances of a true-positive result of all results predicted as positive (which was not reported), we obtain a value of 1.8%, which reflects too many false positives (ie, on average, only 1.8 of 100 subjects predicted as patients with PE are real patients). A system based on such an algorithm will fail to become a practical diagnostic tool, since it did not address the very high inherent imbalance in the problem in favor of non-PE (ie, the very low prevalence of 309 patients with PE among the 60,297 patients tested [<xref ref-type="bibr" rid="ref17">17</xref>]).</p>
        <p>Another recent study by Shen et al [<xref ref-type="bibr" rid="ref18">18</xref>] concentrated on data collected during the first 3 hours of hospitalization, mainly vital signs and laboratory tests results, from high-risk patients (patients who recently had heart failure, have a history of specific diseases, or currently have cancer, risk factors that are also included in the Wells and Geneva screening scores). Although we acknowledge the potential value of this study, we question its applicability to the general population of hospital admissions. We note that making PE predictions for patients who have already met screening score requirements (and failing to detect those who have not), are at high risk for PE, and collecting data after patients’ hospitalization are not relevant when the aim is to screen the general population immediately upon admission. Second, although predictions are based on patients who have already met screening score requirements and are at high risk for PE and based on data after patients’ hospitalization, the PPV performance reported is only of 20%, again due to a failure to consider the imbalanced inhospital data (3% PE prevalence), which also makes the models proposed by the authors less practical for PE screening on hospital admission.</p>
        <p>In contrast, this study focuses on the identification of patients at high risk for PE in the general patient population presenting at emergency departments, using only their demographic and medical history data, prior to the generation of any inhospital data, and using 2 methodologies to tackle the imbalance in PE prevalence in hospital admission data.</p>
      </sec>
      <sec>
        <title>The Aim of This Study</title>
        <p>Our study had three main goals: (1) to identify new PE risk factors, among those available to the medical staff upon hospital admission, that may have been overlooked; (2) to accurately predict PE as early as possible upon patient admission, as assessed by all performance measures (including the PPV), in order to create a practical diagnostic tool for PE; and (3) to group patients using the newly identified and already known risk factors in order to find subgroups of patients who are at higher risk of PE upon hospital admission. That is, the main goals of this study were not only to establish an AI tool for early diagnosis of PE but also to identify new, previously unidentified risk factors for PE that clinicians should have in mind when caring for their general population of patients—and not only those already defined as having high risk of PE—and to use these risk factors to identify informative subgroups of patients at risk of PE upon hospital admission. The fact that we did not restrict our patients to those with high risk for PE according to previously published scales (eg, Wells and revised Geneva scores) enabled us to accurately assess the application of our AI algorithms on raw medical data.</p>
        <p>There are 3 novelties in our study. First, unlike previous studies, our study concentrates on the data of a general population of patients admitted to tertiary care and not just those with a high probability or suspicion of PE. Since straightforward modeling of PE is prone to imbalance due to the low disease prevalence among the admitted patient population, where only a minority of patients admitted to the hospital will later be diagnosed with PE, our second novelty is in proposing 2 methodologies aimed at overcoming the highly imbalanced scenario that provide reasonable PPV values. The third novelty is in a new performance measure we apply to a conventional clustering algorithm to identify clusters that maximize the minority-to-majority (PE to non-PE) ratio and thereby help us focus on PE risk factors and groups of patients at risk on hospital admission from the additional angle of a clustering measure.</p>
        <p>In the sections that follow, we describe the data used in our study and how we addressed the imbalance challenge (“Methods” section), our modeling methods (“A Methodology for Meeting Our 3 Main Goals on Hospital Admission” section), and derived results (“Results” section) before summarizing and proposing future research directions (“Discussion” section).</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>In this section, we present the patient cohort and database, along with our data understanding and preparation methods. Our methods for tackling imbalance, which are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref34">34</xref>], use the features and past diagnoses that we identify in this section as being most effective in differentiating PE from control patients.</p>
      <sec>
        <title>Patients</title>
        <p>The data were obtained by Sheba Medical Center (SMC) using the MDClone ADAMS Platform, maximizing collaboration with synthetic data while maintaining patient privacy and maximizing data use [<xref ref-type="bibr" rid="ref35">35</xref>]. Balancing patient privacy, legal, compliance, and security issues often hinders individuals, teams, and organizations from working together to share data. However, the MDClone ADAMS Platform overcomes these common obstacles by allowing users to access and share data and information across both internal and external entities with synthetic data safely and securely. It provides data not only for research purposes but also for real-time clinical applications, which ensures the high reliability of the extracted data. Several publications have validated the usage of MDClone-generated synthetic data for epidemiological studies. In 1 study, Foraker et al [<xref ref-type="bibr" rid="ref36">36</xref>] thoroughly compared the synthetic and real data of septic patients and did not find differences in the statistical conclusions derived. Benaim et al [<xref ref-type="bibr" rid="ref37">37</xref>] also validated the application of MDClone-generated synthetic data in 5 observational studies and concluded that predictability was highly preserved. In this study, 2600 patients diagnosed with PE and 53,250 patients with non-PE diagnoses (controls) were represented by their demographic, clinical, laboratory test results, and medical history data.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The synthetic data were sampled from the real-world, raw data distribution such that the statistical characteristics of their distribution replicate those of the true distribution and do not contain identifiable information included in the real data. Clinical events in the synthetic samples are slightly and randomly shifted in time to avoid the possibility of de-anonymizing the data. To protect patient privacy, the SMC database consists of synthetic data generated from the data of all patients who were hospitalized in the center’s internal medicine departments between 2008 and 2020. Patients’ data were retrieved, and the MDClone system was used after the Sheba Medical Center institutional review board approval of this study (7864-20-SMC).</p>
      </sec>
      <sec>
        <title>Data Understanding and Preparation</title>
        <p>We suggest a model capable of predicting PE in a patient upon his or her admission to the hospital, based only on the data available in the patient’s electronic medical record before admission. We focus exclusively on data available prior to emergency department admission, as these are the most universally accessible data worldwide. This approach allowed us to overcome the challenge stemming from variability in hospital-specific clinical decisions and routines, which could limit the broader applicability of our findings. We collected demographics, comorbidities, and chronic medications data for 2568 patients with PE and 52,598 control patients. While patient records for the age and sex variables were complete, the BMI variable was frequently missing; thus, we removed records with missing BMI values. This left us with the data of 1942 patients with PE (PE=1) and 44,697 control patients (PE=0). <xref ref-type="table" rid="table1">Table 1</xref> provides some descriptive statistics for age, sex, and BMI for the patients with PE and controls. Since the 2 populations were of unequal sample sizes and variances, to check significance, we applied a Welch’s <italic>t</italic> test (unequal variance 2-tailed <italic>t</italic> test). According to this test, patients with PE were significantly older (mean 69.4, SD 16.8 vs mean 65.3, SD 17.5; <italic>P</italic>&#60;.001) and significantly more obese (mean 27.2, SD 5.9 vs mean 26.8, SD 5.3; <italic>P</italic>&#60;.001). Moreover, female patients were diagnosed with PE significantly more often than male patients (1097/1942, 56.5% vs 845/1942, 43.5%; <italic>P</italic>&#60;.001). Note, however, that our data were obtained for all patients admitted to the internal department of SMC, and, thus, case subjects and control subjects were not matched, which may explain some of the differences between the 2 groups.</p>
        <p>Since the time of past diagnoses is not always recorded in hospital records, and sometimes it is even based on patient memory, we ignored this parameter and treated each of these clinical events as a binary variable indicating whether a patient had received this diagnosis in the past. Then we merged 19 past diagnoses into 9 categories of diagnostic “families,” as is frequently done. In addition to the clinical motivation, this merging also helped us unify sparse diagnoses into denser categories. We treated past PE as a stand-alone category and did not associate it with the thrombosis category, as would be customary, because it is a known risk factor for PE, and we wanted to examine it as a “stand-alone” diagnosis in our model. Also, we converted the only chronic medication variable existing in our data, the registration date of a patient receiving treatment with anticoagulants, into a binary variable indicating whether the patient had received such treatment in the past. <xref ref-type="table" rid="table2">Table 2</xref> shows the prevalence of the 9 categories of past diagnoses and that of anticoagulant use for the 2 patient groups. Except for the gastrointestinal past diagnosis, the differences in prevalence between the 2 groups for all past diagnoses are statistically significant according to the Welch’s <italic>t</italic> test.</p>
        <p>Using the PrefixSpan algorithm, which is designed to discover sequential patterns in sequence databases [<xref ref-type="bibr" rid="ref38">38</xref>], we looked for common combinations of 2 or more past diagnoses (plus the single variable indicating the use of anticoagulants) in each of the PE and control patients. In general, we found that the patients with PE had a richer background of past diagnoses than the control patients, with only 15.7% (305/1942) of them not having any past diagnosis, compared with 46.8% (20,918/44,697) of the control patients (<italic>P</italic>&#60;.001). In addition, we found that the 2 patient groups showed a high incidence of cardiovascular disease in the past. As for the combinations, pulmonary and cardiovascular background diagnoses were the most common, with 13.1% (254/1942) of the patients with PE having both (compared with only 3.1% [1386/44,697] of the control patients), while the combination of cardiovascular and past PE affected 9.1% (177/1942) (447/44,697; 1% control), pulmonary and past PE affected 9% (175/1942; 447/44,697, 1% of the control group), and cardiovascular and anticoagulant usage affected 7.3% (142/1942; 1609/44,697, 3.6% of the control group). We also identified 16 combinations of 3 variables and 1 combination of 4 variables (anticoagulants, pulmonary, cardiovascular, and past PE) affecting more than 1% (20/1942) of patients with PE. It is notable that the control patients did not have a single combination of 3 or more variables in more than 1% of the population. <xref rid="figure1" ref-type="fig">Figure 1</xref> presents the prevalence (%) of past diagnoses and chronic medications for PE and control patients for single diagnoses, pairs of past diagnoses, and combinations of 3 past diagnoses. The past diagnoses identified here served as the basis for patient representation when tackling imbalance in training and testing PE classifiers (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Descriptive statistics for patients with pulmonary embolism (PE) and control patients.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="360"/>
            <col width="130"/>
            <col width="170"/>
            <col width="0"/>
            <col width="310"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Variable</td>
                <td colspan="3">Sex distribution</td>
                <td><italic>P</italic> value</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>PE<sup>a</sup></td>
                <td colspan="2">Control</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Sex (%)</bold>
                </td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>43.5</td>
                <td>55.4</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>56.5</td>
                <td>44.6</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Age (years), mean (SD)</td>
                <td>69.4 (16.8)</td>
                <td colspan="2">65.3 (17.5)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">BMI, mean (SD)</td>
                <td>27.2 (5.9)</td>
                <td colspan="2">26.8 (5.3)</td>
                <td>.003</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>PE: pulmonary embolism.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Composition and prevalence of past diagnosis categories and anticoagulant use in our database.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="290"/>
            <col width="340"/>
            <col width="80"/>
            <col width="100"/>
            <col width="190"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>Past diagnosis</td>
                <td colspan="2">Prevalence (%)</td>
                <td><italic>P</italic> value</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PE<sup>a</sup></td>
                <td>Control</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Surgery</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Past surgery documentation</p>
                    </list-item>
                    <list-item>
                      <p>Past surgery procedure</p>
                    </list-item>
                  </list>
                </td>
                <td>6.9</td>
                <td>5.3</td>
                <td>&#60;.01</td>
              </tr>
              <tr valign="top">
                <td>Pulmonary</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>Pleural effusion</p>
                    </list-item>
                    <list-item>
                      <p>Chronic obstructive pulmonary disease</p>
                    </list-item>
                    <list-item>
                      <p>Restrictive lung disease</p>
                    </list-item>
                  </list>
                </td>
                <td>30.2</td>
                <td>7.6</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Thrombosis</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Deep vein thrombosis</p>
                    </list-item>
                    <list-item>
                      <p>Coagulopathies</p>
                    </list-item>
                  </list>
                </td>
                <td>15.1</td>
                <td>3.3</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Cardiovascular</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Ischemic heart disease</p>
                    </list-item>
                    <list-item>
                      <p>Atrial fibrillation or atrial flutter</p>
                    </list-item>
                    <list-item>
                      <p>Congestive heart failure</p>
                    </list-item>
                  </list>
                </td>
                <td>33.6</td>
                <td>36</td>
                <td>.03</td>
              </tr>
              <tr valign="top">
                <td>Kidney</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Chronic kidney disease</p>
                    </list-item>
                  </list>
                </td>
                <td>8.8</td>
                <td>6.2</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Gastrointestinal</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Inflammatory bowel disease</p>
                    </list-item>
                  </list>
                </td>
                <td>0.9</td>
                <td>1.3</td>
                <td>.07</td>
              </tr>
              <tr valign="top">
                <td>Joints</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Rheumatoid arthritis</p>
                    </list-item>
                  </list>
                </td>
                <td>1.6</td>
                <td>0.7</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Neurologic or psychiatric</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Dementia</p>
                    </list-item>
                    <list-item>
                      <p>Mental disorders</p>
                    </list-item>
                    <list-item>
                      <p>Mood disorders</p>
                    </list-item>
                    <list-item>
                      <p>Psychosis</p>
                    </list-item>
                  </list>
                </td>
                <td>10.0</td>
                <td>4.7</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Past PE</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Past PE</p>
                    </list-item>
                  </list>
                </td>
                <td>39.5</td>
                <td>1.1</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Anticoagulants</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Anticoagulants</p>
                    </list-item>
                  </list>
                </td>
                <td>13.8</td>
                <td>6.1</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>PE: pulmonary embolism.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Prevalence for a single past diagnosis or medication in each of the study populations (top left), a pair of past diagnoses or medications in each population (top right), and a triple past diagnosis or medication in each population (bottom). PE: pulmonary embolism.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e48595_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>A Methodology for Meeting Our 3 Main Goals on Hospital Admission</title>
        <p>The 3 main goals of this study were to identify new PE risk factors, among those available to the medical staff upon hospital admission that may have been overlooked, to accurately predict PE as early as possible upon patient admission, and to group subgroups of patients who have the newly identified together with known risk factors and are at higher risk of PE. To achieve the first goal, we identified important features (previous diagnoses in the patient’s medical records) according to the classifier, validated this identification using an ablation study, and focused the analysis on important individual diagnoses that may be considered potential risk factors. To achieve the second goal, we used performance measures dedicated to imbalanced scenarios to estimate the ability of the 2 methodologies we suggested (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) and establish a common benchmark that would enable the comparison of the 2 methods. To achieve the third goal, we clustered the population on hospital admission to identify some homogenous clusters that maximize the minority-to-majority (PE to non-PE) ratio and then focused on risk factors that established these risk groups.</p>
      </sec>
      <sec>
        <title>Feature Importance—Initial Identification of Potential Risk Factors</title>
        <p>We used a random forest (RF) consisting of a collection of tree-structured classifiers, each trained on bootstrapped samples of the training data, that randomly searches across subsets of the 13 input variables (the 10 in <xref ref-type="table" rid="table2">Table 2</xref> plus the 3 in <xref ref-type="table" rid="table1">Table 1</xref>) to determine the forest’s trees. The output of the classifier is a majority vote of the trees. To maximize its performance, we adjusted 3 hyperparameters of the RF: maximum depth—the longest path possible from a tree root to a tree leaf; minimum sample split—the minimal number of samples required to split an internal node; and the split criterion—either the Gini impurity or the information gain [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. Our prior research demonstrates the accuracy and efficiency of the RF classifier in several clinical domains [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref45">45</xref>]. To increase confidence in the classifiers obtained by methods 1 and 2 (a description of the methods is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), we repeated the evaluation of the methods over 10 data permutations and reported the averaged performance.</p>
        <p>The RF model measures the contribution (ie, importance) of its input variables (features) to classification using the Gini index, measuring the chance of misclassifying a random record based on the a priori probabilities of each class in a particular split in a tree [<xref ref-type="bibr" rid="ref39">39</xref>]. However, evaluation over 10 data permutations yielded 10 lists of feature importance for each method. Therefore, to determine feature importance, we proposed to evaluate the statistical difference between 10 feature lists of both methods using a Friedman-Nemenyi test [<xref ref-type="bibr" rid="ref46">46</xref>], as described in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Accurate PE Prediction Under the Imbalanced Scenario</title>
        <p>To perform an in-depth comparison of the performance of methods 1 and 2, we needed to evaluate them using the same data both in the training and tests sets, because method 1 uses more control subjects than method 2. Therefore, we evaluated the methods using a positive (PE) class measure, the true-positive rate (TPR), and divided the data into training and test sets in 2 steps (<xref rid="figure2" ref-type="fig">Figure 2</xref>):</p>
        <list list-type="order">
          <list-item>
            <p>We created the data sets for method 2 by creating a balanced test set: 20% of the PE subjects and the same number of control subjects and left the remaining subjects for training.</p>
          </list-item>
          <list-item>
            <p>We created the data sets for method 1 by keeping the same PE test and training sets used in method 2. Then, we balanced the remaining control patients (beyond those used in method 2) between the control training and test sets to include 80% and 20% of the original control data set, respectively.</p>
          </list-item>
        </list>
        <p>To focus the evaluation of methods 1 and 2 on their performance on patients with PE, we compared the TPR values of the 2 methods using a univariate 2-tailed <italic>t</italic> test [<xref ref-type="bibr" rid="ref47">47</xref>].</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Division into training and test sets to compare methods 1 and 2. X and Y are the numbers of patients equal to 20% of the PE and control sets, respectively. PE: pulmonary embolism.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e48595_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Clustering to Informative Homogeneous Subgroups for PE</title>
        <p>To add to our study additional angle on the characteristics of patients at risk for PE on hospital admission, we wanted to find as homogeneous subgroups as possible for PE using a different performance measure than those already used. To achieve this goal, first we relaxed the categorization of the 19 indications into the 9 categories we created earlier whether there were specific past diagnoses that had not been detected in the process performed so far that may be considered as potential risk factors for PE. Second, we established different possible feature representations for patients based on all combinations of features (past diagnoses and demographics data). Third, we suggested to maximize a performance measure that accounts for the percentages of patients with PE in a clustering scheme’s clusters to simultaneously solve 2 patient representation problems that are usually solved separately, and their optimization is based on results of each other. These are the optimal feature representation and clustering scheme (number of clusters). Using this measure and the <italic>K</italic>-means algorithm, we clustered patients while considering different patient representations (combinations of possible feature representations and possible clustering schemes with <italic>K</italic>={2, 5}) to derive that which yields clusters that maximize the percentage of patients with PE compared with the PE prevalence in the data (1942/46,639, 4.16%).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We evaluated our methodology under the imbalanced scenario according to five criteria: (1) Gini-based feature significance, (2) classification performance measures, (3) ablation-based feature significance, (4) TPR-driven maximization, and (5) identification of potential risk factors.</p>
      <sec>
        <title>Gini-Based Feature Significance</title>
        <p>Using 10 data permutations, we applied methods 1 and 2 (Figures S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) to predict PE and obtained 10 feature importance lists for the 13 features for each method. Testing each method separately using a Friedman-Nemenyi test, past PE was ranked the most significant by both methods, and age, BMI, pulmonary, and past thrombosis (<xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>) were ranked not significantly differently than past PE by both methods, as well as anticoagulant usage for method 1 and sex for method 2. The other 6 features (ie, past diagnoses: surgery, cardiovascular, kidney, gastrointestinal, joints, and neurologic or psychiatric) were ranked significantly differently than past PE.</p>
      </sec>
      <sec>
        <title>Classification Performance Measures</title>
        <p><xref ref-type="table" rid="table3">Tables 3</xref> and <xref ref-type="table" rid="table4">4</xref> present confusion matrices for methods 1 and 2, respectively, and <xref ref-type="table" rid="table5">Table 5</xref> presents performance measures of the best model based on the 7 most important features for each method (derived as described in the “Gini-Based Feature Significance” section) averaged over 10 data permutations. The tables demonstrate that method 1 achieved higher performance values on most measures than method 2 (and similar true-negative values) in most cases except for precision and the <italic>F</italic><sub>1</sub>-score. The very low precision value and very high negative predictive values of method 1 suggest that the decision threshold that was selected by this method is low, allowing more subjects to pass the threshold, among them (because of the imbalance) more controls that are thus incorrectly predicted as PE, reducing precision. However, on the other hand, this “low” threshold guarantees that almost all subjects that are identified as controls are indeed controls, increasing the negative predictive values to almost a perfect value. This “low” threshold also ensures predicting more patients with PE as PE and fewer as controls, both of which increase the TPR. These mechanisms represent a trade-off in the calculation of the main performance measure, as is expected in imbalanced scenarios.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Method 1—confusion matrix.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>True negative</td>
                <td>True positive</td>
                <td>Total predicted</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Predicted negative</td>
                <td>0.83</td>
                <td>0.01</td>
                <td>0.84</td>
              </tr>
              <tr valign="top">
                <td>Predicted positive</td>
                <td>0.13</td>
                <td>0.03</td>
                <td>0.16</td>
              </tr>
              <tr valign="top">
                <td>Total true</td>
                <td>0.96</td>
                <td>0.04</td>
                <td>1</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Method 2—confusion matrix.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>True negative</td>
                <td>True positive</td>
                <td>Total predicted</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Predicted negative</td>
                <td>0.44</td>
                <td>0.16</td>
                <td>0.6</td>
              </tr>
              <tr valign="top">
                <td>Predicted positive</td>
                <td>0.06</td>
                <td>0.34</td>
                <td>0.4</td>
              </tr>
              <tr valign="top">
                <td>Total true</td>
                <td>0.5</td>
                <td>0.5</td>
                <td>1</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Performance measures (the values of measures for the best method are italicized).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="120"/>
            <col width="120"/>
            <col width="110"/>
            <col width="110"/>
            <col width="110"/>
            <col width="110"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Measure</td>
                <td>Accuracy</td>
                <td>Precision</td>
                <td>TPR<sup>a</sup></td>
                <td>TNR<sup>b</sup></td>
                <td>NPV<sup>c</sup></td>
                <td><italic>F</italic><sub>1</sub>-score</td>
                <td>GM<sup>d</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Method 1</td>
                <td>
                  <italic>0.86</italic>
                </td>
                <td>0.19</td>
                <td>
                  <italic>0.75</italic>
                </td>
                <td>0.86</td>
                <td>
                  <italic>0.99</italic>
                </td>
                <td>0.30</td>
                <td>
                  <italic>0.80</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>Method 2</td>
                <td>0.78</td>
                <td>
                  <italic>0.85</italic>
                </td>
                <td>0.68</td>
                <td>
                  <italic>0.88</italic>
                </td>
                <td>0.73</td>
                <td>
                  <italic>0.76</italic>
                </td>
                <td>0.77</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>TPR: true-positive rate.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>TNR: true-negative rate.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>NPV: negative predictive values.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>GM: geometric mean.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Ablation-Based Feature Significance</title>
        <p>While the selection of features was Gini-based, we wanted to back this selection based on classification performance and an ablation study [<xref ref-type="bibr" rid="ref48">48</xref>]. For each method, we compared a classifier based on the 7 features that are the union of the important features identified for both methods (past PE, age, BMI, pulmonary, thrombosis, anticoagulants, and sex), a classifier that is based on each method’s 6 selected features, and 6 classifiers in which each is missing 1 of the 6 important features of a method. We trained each classifier on 10 data permutations and examined statistical difference between the geometric mean values of the classifiers using the validation set and the Friedman-Nemenyi test for each method separately. We observed that only those classifiers missing past PE, pulmonary, or thrombosis were significantly worse in their geometric mean value than the full classifier (the one that included all 7 important features). We also observed that there was no significant difference between classifiers of the 2 methods. This analysis reinforces the results presented in the “Gini-Based Feature Significance” section about the significance of past PE, pulmonary, and thrombosis to PE identification.</p>
      </sec>
      <sec>
        <title>TPR-Driven Maximization</title>
        <p>To examine whether one method was superior to the other on the positive (PE) group when using the same patients with PE in training and testing, we compared the TPR values of the best models produced by the 2 methods using 10 data permutations and the univariate 2-tailed <italic>t</italic> test. We found that there was no significant difference between the 2 methods regarding the TPR results at 95% CI.</p>
      </sec>
      <sec>
        <title>Identification of Potential Risk Factors</title>
        <p>To deepen our understanding of PE on hospital admission, we relaxed the categorization of the 19 indications into the 9 categories we created earlier (in the “Data Understanding and Preparation” section) to examine whether there were specific past diagnoses that had not been detected in the process performed so far, which may be considered as potential risk factors for PE. Simultaneously, we also attempted to find as homogeneous a group as possible of patients with PE. We hoped that such a group would be characterized differently than other groups, hinting at potential PE risk factors.</p>
        <p>First, we examined the prevalence of each of the past diagnoses existing in our data for each of the patients with PE and control patients. We found that the prevalence of several of these past diagnoses, such as past PE, pneumonia, and DVT, was higher in patients with PE than in control patients (<xref rid="figure3" ref-type="fig">Figure 3</xref>). On the other hand, the only past diagnosis having an incidence higher than 1% in control patients compared with patients with PE was ischemic heart disease.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Prevalence of an original past diagnosis or medication in each of the study populations. CHF: congestive heart failure; CKD: chronic kidney disease; COPD: chronic obstructive pulmonary disease; DVT: deep vein thrombosis; IBD: inflammatory bowel disease; IHD: ischemic heart disease; PE: pulmonary embolism.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e48595_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Next, to ensure our ability to find the most homogeneous group of patients with PE, we clustered, using the <italic>K</italic>-means algorithm, patient representations using different combinations of the features [<xref ref-type="bibr" rid="ref49">49</xref>]. The only features we chose to omit from the clustering process were those with a frequency below a 2% threshold in both populations: past surgery procedure, rheumatoid arthritis, inflammatory bowel disease, psychosis, and restrictive lung disease. Therefore, we were left with 18 possible features for patient representation: 3 demographic (age, sex, and BMI), 14 past diagnoses, and 1 chronic medication (anticoagulants). Because the prevalence of patients with PE in our data was very low (1942/46,639, 4.16%), we looked for a cluster or more where the percentage of patients with PE was significantly higher than this threshold.</p>
        <p>After simultaneously considering possible clustering schemes (<italic>K</italic>={2, 5}) and patient representations based on a combination of features, we found 2 cluster schemes that yielded interesting results. The first scheme (<xref rid="figure4" ref-type="fig">Figure 4</xref>), which yielded the cluster with the highest frequency of patients with PE (705/1120, 63%; cluster 4 in <xref ref-type="table" rid="table6">Table 6</xref>), is a 5-cluster scheme in which each patient is represented by sex, age, BMI, and past PE, past pneumonia, history of atrial fibrillation or atrial flutter, past DVT, history of mental disorders, and history of coagulopathies. We see that cluster 4, which is the most PE-populated cluster of this scheme, represents a profile that is based on both demographic and medical history features, suggesting complex relations between such characteristics that are nontrivial and need further exploration. Cluster 4 contained 1120 patients (1120/46,639, 2.4% of all patients): 705 were patients with PE (705/1942, 36.3% of all patients with PE in our data) and only 415 were controls. Prominent characteristics of this cluster compared with the others are that all its patients had a past diagnosis of PE (see <xref ref-type="table" rid="table7">Table 7</xref>), a relatively high incidence of past DVT (12% compared with 2% in any of the other clusters) and past pneumonia (11% compared with 4% or less in any of the other clusters), and a mediocre incidence of mental disorders, and they more often took anticoagulants. Patients in this cluster were equally mixed by sex. The other clusters had only a small incidence of such past diagnoses and were composed of single-sex patients (<xref ref-type="table" rid="table7">Table 7</xref>). That is, this clustering scheme made a clear distinction between a single small, but very PE dominant, cluster 4, and 4 other non-PE clusters that are mainly dominated by demographics (sex and age) and BMI characteristics. Besides showing past PE and DVT as risk factors, which are known to clinicians (see, eg, the study by Scarvelis and Wells [<xref ref-type="bibr" rid="ref50">50</xref>]), patients in cluster 4 also reveal past pneumonia history as a factor, which is evidence of a new risk factor of PE.</p>
        <p>The second clustering scheme (<xref rid="figure5" ref-type="fig">Figure 5</xref>), which resulted in 2 clusters (clusters 2 and 4) with a prevalence of patients with PE of 61% (427/701 and 340/549) or more (<xref ref-type="table" rid="table8">Table 8</xref>), was a 5-cluster schema in which patients were represented by sex, age, BMI, and past PE, past pneumonia, history of mental disorders, and use of anticoagulants. All patients in these 2 clusters had a past diagnosis of PE and a higher incidence of past diagnosis of pneumonia and usage of anticoagulants (<xref ref-type="table" rid="table9">Table 9</xref>), higher BMI values, and, interestingly, they were divided into only women (cluster 2) or only men (cluster 4). Also, cluster 3, which has a relatively high frequency of patients with PE—20% (301/1470) compared with 4.16% in the full database (<xref ref-type="table" rid="table8">Table 8</xref>), is characterized by all patients having a past diagnosis of pneumonia. That is, the chances of developing PE according to this scheme are mainly related to either past PE and past pneumonia or, to a lesser degree, only past pneumonia. Although the incidence values of a past diagnosis of mental disorders and usage of anticoagulants were low, these 2 variables were chosen for patient representation by both clustering schemes, because their contribution to patient segregation was significant in maximizing the cluster PE frequency we optimized in the clustering. The atrial fibrillation or flutter diagnosis was selected only by the first clustering scheme, producing clusters with either all subjects or none of them with this diagnosis, as this partition helped maximize the target measure. Finally, in addition to past PE, which is arguably the most important variable for marking patients with suspected PE, past diagnoses of pneumonia and DVT may also lead to PE. Thus, we recommend overattention to admitted patients with these past diagnoses.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Radar plot of characteristics of the first clustering scheme. A point of a particular cluster on the axis of any feature constitutes the average normalized value (between 0 and 1) for the patients in the cluster. DVT: deep vein thrombosis; PE: pulmonary embolism.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e48595_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>PE<sup>a</sup>-control prevalence in the clusters of the first clustering scheme (the highest PE prevalence for a cluster is italicized).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Cluster, n</td>
                <td>Relative size</td>
                <td colspan="2">Patients, n</td>
                <td>PE/total</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PE</td>
                <td>Control</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td>0.38</td>
                <td>564</td>
                <td>17,046</td>
                <td>0.03</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>0.47</td>
                <td>426</td>
                <td>21,395</td>
                <td>0.02</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>0.07</td>
                <td>106</td>
                <td>3179</td>
                <td>0.03</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>0.06</td>
                <td>141</td>
                <td>2662</td>
                <td>0.05</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>0.02</td>
                <td>705</td>
                <td>415</td>
                <td>
                  <italic>0.63</italic>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>PE: pulmonary embolism.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Average feature values in the clusters of the first clustering scheme.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="130"/>
            <col width="60"/>
            <col width="100"/>
            <col width="60"/>
            <col width="80"/>
            <col width="90"/>
            <col width="170"/>
            <col width="60"/>
            <col width="130"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Cluster, n</td>
                <td>Sex</td>
                <td>Age (years)</td>
                <td>BMI</td>
                <td>Past PE<sup>a</sup></td>
                <td>Pneumonia</td>
                <td>Atrial fibrillation/flutter</td>
                <td>DVT<sup>b</sup></td>
                <td>Mental disorders</td>
                <td>Coagulopathies</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td>1</td>
                <td>65.5</td>
                <td>26.5</td>
                <td>0</td>
                <td>0.03</td>
                <td>0</td>
                <td>0.02</td>
                <td>0.02</td>
                <td>0.02</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>0</td>
                <td>63.2</td>
                <td>26.8</td>
                <td>0</td>
                <td>0.03</td>
                <td>0</td>
                <td>0.02</td>
                <td>0.01</td>
                <td>0.01</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>0</td>
                <td>72.0</td>
                <td>27.7</td>
                <td>0.02</td>
                <td>0.04</td>
                <td>1</td>
                <td>0.02</td>
                <td>0.01</td>
                <td>0.02</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>1</td>
                <td>75.6</td>
                <td>27.9</td>
                <td>0.03</td>
                <td>0.04</td>
                <td>1</td>
                <td>0.02</td>
                <td>0.02</td>
                <td>0.02</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>0.56</td>
                <td>64.3</td>
                <td>27.6</td>
                <td>1</td>
                <td>0.11</td>
                <td>0</td>
                <td>0.12</td>
                <td>0.03</td>
                <td>0.04</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>PE: pulmonary embolism.</p>
            </fn>
            <fn id="table7fn2">
              <p><sup>b</sup>DVT: deep vein thrombosis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Radar plot of characteristics of the second clustering scheme. A point of a particular cluster on the axis of any feature constitutes the average normalized value (between 0 and 1) for the patients in the cluster. PE: pulmonary embolism.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e48595_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Descriptive statistics of the clusters of the second clustering scheme<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="200"/>
            <col width="130"/>
            <col width="160"/>
            <col width="330"/>
            <thead>
              <tr valign="top">
                <td>Cluster, n</td>
                <td>Relative size</td>
                <td colspan="2">Patients, n</td>
                <td>PE/total</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PE<sup>b</sup></td>
                <td>Control</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td>0.42</td>
                <td>510</td>
                <td>19,161</td>
                <td>0.03</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>0.52</td>
                <td>364</td>
                <td>23,884</td>
                <td>0.02</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>0.02</td>
                <td>427</td>
                <td>274</td>
                <td>
                  <italic>0.61</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>0.03</td>
                <td>301</td>
                <td>1169</td>
                <td>0.20</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>0.01</td>
                <td>340</td>
                <td>209</td>
                <td>
                  <italic>0.62</italic>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table8fn1">
              <p><sup>a</sup>Italicized values indicate the high prevalence of patients with pulmonary embolism in clusters 2 and 4 compared with the prevalence of 4.16% of these patients in the cohort.</p>
            </fn>
            <fn id="table8fn2">
              <p><sup>b</sup>PE: pulmonary embolism.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Average value of each feature in each cluster of the second clustering scheme<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="80"/>
            <col width="110"/>
            <col width="80"/>
            <col width="90"/>
            <col width="110"/>
            <col width="140"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Cluster, n</td>
                <td>Sex</td>
                <td>Age (years)</td>
                <td>BMI</td>
                <td>Past PE<sup>b</sup></td>
                <td>Pneumonia</td>
                <td>Mental disorders</td>
                <td>Coagulopathies</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td>1</td>
                <td>66.7</td>
                <td>26.7</td>
                <td>0</td>
                <td>0</td>
                <td>0.02</td>
                <td>0.02</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>0</td>
                <td>64.2</td>
                <td>27.0</td>
                <td>0</td>
                <td>0</td>
                <td>0.01</td>
                <td>0.01</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>
                  <italic>1</italic>
                </td>
                <td>67.3</td>
                <td>27.8</td>
                <td>
                  <italic>1</italic>
                </td>
                <td>0.12</td>
                <td>0.04</td>
                <td>0.04</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>0.45</td>
                <td>69.3</td>
                <td>26.5</td>
                <td>0</td>
                <td>
                  <italic>1</italic>
                </td>
                <td>0.03</td>
                <td>0.03</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>
                  <italic>0</italic>
                </td>
                <td>63.0</td>
                <td>27.5</td>
                <td>
                  <italic>1</italic>
                </td>
                <td>0.11</td>
                <td>0.02</td>
                <td>0.04</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table9fn1">
              <p><sup>a</sup>All patients of clusters 2 and 4 (see <xref ref-type="table" rid="table8">Table 8</xref>), clusters that are mostly populated by patients with PE, have a past diagnosis of pulmonary embolism and higher incidence of past diagnosis of pneumonia and usage of anticoagulants. While cluster 2 composes of only women relatively old, cluster 4 includes only men relatively young (italicized values).</p>
            </fn>
            <fn id="table9fn2">
              <p><sup>b</sup>PE: pulmonary embolism.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>We are not the first to address the potential benefits in applying AI to PE, as AI does not require additional clinician input or cause workflow disruption by automatically screening a broad inpatient population [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. These studies reported the application of AI in interpreting chest images [<xref ref-type="bibr" rid="ref14">14</xref>] or their accompanying radiology reports [<xref ref-type="bibr" rid="ref51">51</xref>] and in using laboratory test results and vital signs [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] of already diagnosed patients with PE. In addition, these studies often targeted PE at patients who are already at elevated risk of PE [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>To better understand and predict the occurrence and diagnosis of PE, we have suggested tools that may be clinically available on patient hospital admission—the earliest point in time to model PE in the hospital. Also, we addressed the entire population of patients presenting to a tertiary hospital, without narrowing them to only those with high probability of PE, as required by the Wells or revised Genova scores, to screen the full admitted population and not only those with higher risk of PE. Using 13 years of data from around 50,000 patients with PE and those with no PE, we considered the medical history of patients presenting in a hospital, where their prior chance of being diagnosed with PE was 4%. To create reliable models, we suggested 2 methods to address this highly imbalanced clinical scenario. The first empirically sets a classifier decision threshold to account for the minority-to-majority ratio in the imbalanced data, and the second uses an ensemble of balanced classifiers evaluating the PE class versus equal-sized disjoint portions of the imbalanced non-PE class. To identify significant predictive past diagnoses, we considered diagnoses in 2 resolutions, separately and categorized, and proposed a nonparametric statistical test to consolidate feature importance lists obtained over data permutations. An ablation study validated results based on the test. Lists produced by the 2 methods were almost identical, and classifiers trained using them provided similar performance, validating the correctness of both methods. Past diagnoses of PE, pulmonary diseases, and thrombosis, together with age, sex, BMI, and usage of anticoagulants contributed to 80% accuracy in early identification of both patients with PE and those with no PE, with no statistical difference between the methods.</p>
      <p>Although only 4% of the patients in our data set had a definitive diagnosis of PE, we identified, by joint optimization of feature representation and a clustering scheme, subgroups comprising more than 60% PE-positive patients. These subgroups facilitated a more in-depth analysis to identify demographic and past diagnosis characteristics, demographic-clinical relations, and potential PE risk factors. Some of our findings are not conventionally considered. For example, in medical practice, a past diagnosis of pulmonary disease on admission is related to a chronic respiratory disease displaying respiratory symptoms, but we show here that this may also be an indication of PE. This is an important clinical pearl that should prompt clinicians worldwide, with and without access to AI predictive algorithms.</p>
      <p>In summary, despite the high imbalance in the data and the very early stage of modeling, our prediction and clustering models were both accurate and informative in identifying patients at high risk for PE, at the time of hospital admission, before a patient is even seen by a hospital doctor. The main advantage in our findings, which is relevant to every health care professional, is the fact that applying AI enables whole-patient-population analysis without the need to adhere to previously published criteria (eg, Wells or revised Genova scores), which are not sensitive enough [<xref ref-type="bibr" rid="ref52">52</xref>], or to limit its use to high-risk patients. In the case of PE, this approach defines previous pulmonary disease, whether with past PE or alone, as a new, significant risk factor to consider.</p>
      <p>A limitation of our research is working with data, which, due to privacy restrictions, were synthesized from real data. The difference between real data and synthetic data derived from real data is that the synthetic data include times of diagnosis and measurements that are randomly shifted to up to a year before or after the real time and thus could not be verified. Therefore, in our study, the existence of past diagnoses was expressed using binary variables (whether a diagnosis exists or not) and not by the times of diagnosis, which is vital information that could have contributed greatly to the models.</p>
      <p>Our future research will use real data and also assimilate data from the emergency department (validating and amending, as necessary, the admission model) and then the internal ward, whenever data are collected, which may necessitate the use of dynamic modeling. In addition, while our second method was applied to a balanced test set, it should be further examined on a nonbalanced database that reflects the true PE prevalence. Finally, beyond being the basis for predictive and informative models of PE, we see our workflow as suitable for other diseases, especially since in almost all of them, an imbalanced scenario is involved.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Methods developed for imbalanced data.</p>
        <media xlink:href="jmir_v26i1e48595_app1.docx" xlink:title="DOCX File , 166 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>A statistical test to consolidate different recommendations on feature importance.</p>
        <media xlink:href="jmir_v26i1e48595_app2.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CTA</term>
          <def>
            <p>computed tomographic angiography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">DVT</term>
          <def>
            <p>deep vein thrombosis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PE</term>
          <def>
            <p>pulmonary embolism</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SMC</term>
          <def>
            <p>Sheba Medical Center</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">TPR</term>
          <def>
            <p>true-positive rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">VTE</term>
          <def>
            <p>venous thromboembolism</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldhaber</surname>
              <given-names>SZ</given-names>
            </name>
            <name name-style="western">
              <surname>Bounameaux</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Pulmonary embolism and deep vein thrombosis</article-title>
          <source>Lancet</source>
          <year>2012</year>
          <volume>379</volume>
          <issue>9828</issue>
          <fpage>1835</fpage>
          <lpage>1846</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(11)61904-1</pub-id>
          <pub-id pub-id-type="medline">22494827</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(11)61904-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruppert</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Steinle</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lees</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Economic burden of venous thromboembolism: a systematic review</article-title>
          <source>J Med Econ</source>
          <year>2011</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>65</fpage>
          <lpage>74</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/full/10.3111/13696998.2010.546465"/>
          </comment>
          <pub-id pub-id-type="doi">10.3111/13696998.2010.546465</pub-id>
          <pub-id pub-id-type="medline">21222564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grosse</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Nyarko</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Raskob</surname>
              <given-names>GE</given-names>
            </name>
          </person-group>
          <article-title>The economic burden of incident venous thromboembolism in the United States: a review of estimated attributable healthcare costs</article-title>
          <source>Thromb Res</source>
          <year>2016</year>
          <volume>137</volume>
          <fpage>3</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26654719"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.thromres.2015.11.033</pub-id>
          <pub-id pub-id-type="medline">26654719</pub-id>
          <pub-id pub-id-type="pii">S0049-3848(15)30209-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4706477</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gartner</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kolisch</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Neill</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Padman</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Machine learning approaches for early DRG classification and resource allocation</article-title>
          <source>INFORMS J Comput</source>
          <year>2015</year>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>597</fpage>
          <lpage>808</lpage>
          <pub-id pub-id-type="doi">10.1287/ijoc.2015.0655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vacca</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Jehle</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Acute pulmonary embolism</article-title>
          <source>Nursing</source>
          <year>2013</year>
          <volume>43</volume>
          <issue>3</issue>
          <fpage>25</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1470-2118(24)01179-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/01.NURSE.0000427353.73708.9d</pub-id>
          <pub-id pub-id-type="medline">23411548</pub-id>
          <pub-id pub-id-type="pii">00152193-201303000-00009</pub-id>
          <pub-id pub-id-type="pmcid">PMC6542219</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morrone</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Morrone</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Erratum: acute pulmonary embolism: focus on the clinical picture</article-title>
          <source>Korean Circ J</source>
          <year>2018</year>
          <volume>48</volume>
          <issue>5</issue>
          <fpage>365</fpage>
          <lpage>381</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29968441"/>
          </comment>
          <pub-id pub-id-type="doi">10.4070/kcj.2017.0998</pub-id>
          <pub-id pub-id-type="medline">29968441</pub-id>
          <pub-id pub-id-type="pii">48.661</pub-id>
          <pub-id pub-id-type="pmcid">PMC6031720</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Comparison of the Wells score with the revised Geneva score for assessing pretest probability of pulmonary embolism in hospitalized elderly patients</article-title>
          <source>Eur J Intern Med</source>
          <year>2016</year>
          <volume>36</volume>
          <fpage>e18</fpage>
          <lpage>e19</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ejim.2016.09.003</pub-id>
          <pub-id pub-id-type="medline">27650507</pub-id>
          <pub-id pub-id-type="pii">S0953-6205(16)30304-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Ramaseshan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mendelson</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Comparison of the Wells and revised Geneva scores for the diagnosis of pulmonary embolism: an Australian experience</article-title>
          <source>Intern Med J</source>
          <year>2011</year>
          <volume>41</volume>
          <issue>3</issue>
          <fpage>258</fpage>
          <lpage>263</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1445-5994.2010.02204.x</pub-id>
          <pub-id pub-id-type="medline">20214691</pub-id>
          <pub-id pub-id-type="pii">IMJ2204</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le Gal</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Righini</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Sanchez</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Aujesky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bounameaux</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Perrier</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Prediction of pulmonary embolism in the emergency department: the revised Geneva score</article-title>
          <source>Ann Intern Med</source>
          <year>2006</year>
          <volume>144</volume>
          <issue>3</issue>
          <fpage>165</fpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-144-3-200602070-00004</pub-id>
          <pub-id pub-id-type="medline">16461960</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ceriani</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Combescure</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Le Gal</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nendaz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perneger</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bounameaux</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Perrier</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Righini</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Clinical prediction rules for pulmonary embolism: a systematic review and meta-analysis</article-title>
          <source>J Thromb Haemost</source>
          <year>2010</year>
          <volume>8</volume>
          <issue>5</issue>
          <fpage>957</fpage>
          <lpage>970</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1538-7836(22)12404-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1538-7836.2010.03801.x</pub-id>
          <pub-id pub-id-type="medline">20149072</pub-id>
          <pub-id pub-id-type="pii">S1538-7836(22)12404-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Es</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Takada</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kraaijpoel</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Klok</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Stals</surname>
              <given-names>MAM</given-names>
            </name>
            <name name-style="western">
              <surname>Büller</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Courtney</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Freund</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Galipienzo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Le Gal</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ghanima</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Huisman</surname>
              <given-names>MV</given-names>
            </name>
            <name name-style="western">
              <surname>Kline</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>KGM</given-names>
            </name>
            <name name-style="western">
              <surname>Parpia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Perrier</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Righini</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Robert-Ebadi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Wells</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>de Wit</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>van Smeden</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Geersing</surname>
              <given-names>GJ</given-names>
            </name>
          </person-group>
          <article-title>Diagnostic management of acute pulmonary embolism: a prediction model based on a patient data meta-analysis</article-title>
          <source>Eur Heart J</source>
          <year>2023</year>
          <volume>44</volume>
          <issue>32</issue>
          <fpage>3073</fpage>
          <lpage>3081</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37452732"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/eurheartj/ehad417</pub-id>
          <pub-id pub-id-type="medline">37452732</pub-id>
          <pub-id pub-id-type="pii">7224694</pub-id>
          <pub-id pub-id-type="pmcid">PMC10917087</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miniati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bottai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Monti</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Salvadori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Serasini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Passera</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Simple and accurate prediction of the clinical probability of pulmonary embolism</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2008</year>
          <volume>178</volume>
          <issue>3</issue>
          <fpage>290</fpage>
          <lpage>294</lpage>
          <pub-id pub-id-type="doi">10.1164/rccm.200802-207OC</pub-id>
          <pub-id pub-id-type="medline">18436792</pub-id>
          <pub-id pub-id-type="pii">200802-207OC</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wicki</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Perrier</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Perneger</surname>
              <given-names>TV</given-names>
            </name>
            <name name-style="western">
              <surname>Bounameaux</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Junod</surname>
              <given-names>AF</given-names>
            </name>
          </person-group>
          <article-title>Predicting adverse outcome in patients with acute pulmonary embolism: a risk score</article-title>
          <source>Thromb Haemost</source>
          <year>2000</year>
          <volume>84</volume>
          <issue>4</issue>
          <fpage>548</fpage>
          <lpage>552</lpage>
          <pub-id pub-id-type="medline">11057848</pub-id>
          <pub-id pub-id-type="pii">00100548</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Banerjee</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sofela</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mushlin</surname>
              <given-names>AI</given-names>
            </name>
            <name name-style="western">
              <surname>Desai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bledsoe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Amrhein</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Zamanian</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lungren</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Development and performance of the pulmonary embolism Result Forecast Model (PERFORM) for computed tomography clinical decision support</article-title>
          <source>JAMA Netw Open</source>
          <year>2019</year>
          <volume>2</volume>
          <issue>8</issue>
          <fpage>e198719</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31390040"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.8719</pub-id>
          <pub-id pub-id-type="medline">31390040</pub-id>
          <pub-id pub-id-type="pii">2747483</pub-id>
          <pub-id pub-id-type="pmcid">PMC6686780</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rucco</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sousa-Rodrigues</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Merelli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Falsetti</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nitti</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Salvi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Neural hypernetwork approach for pulmonary embolism diagnosis</article-title>
          <source>BMC Res Notes</source>
          <year>2015</year>
          <volume>8</volume>
          <fpage>617</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcresnotes.biomedcentral.com/articles/10.1186/s13104-015-1554-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13104-015-1554-5</pub-id>
          <pub-id pub-id-type="medline">26515513</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13104-015-1554-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC4627406</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>QY</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>QF</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>LF</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>LY</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>FH</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Clinical symptoms and related risk factors in pulmonary embolism patients and cluster analysis based on these symptoms</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>14887</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-017-14888-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-017-14888-7</pub-id>
          <pub-id pub-id-type="medline">29097743</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-017-14888-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5668424</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Maharjan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mataraso</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Calvert</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Predicting pulmonary embolism among hospitalized patients with machine learning algorithms</article-title>
          <source>Pulm Circ</source>
          <year>2022</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>e12013</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35506114"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/pul2.12013</pub-id>
          <pub-id pub-id-type="medline">35506114</pub-id>
          <pub-id pub-id-type="pii">PUL212013</pub-id>
          <pub-id pub-id-type="pmcid">PMC9052977</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Casie Chetty</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shokouhi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Maharjan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chuba</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Calvert</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Massive external validation of a machine learning algorithm to predict pulmonary embolism in hospitalized patients</article-title>
          <source>Thromb Res</source>
          <year>2022</year>
          <volume>216</volume>
          <fpage>14</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0049-3848(22)00290-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.thromres.2022.05.016</pub-id>
          <pub-id pub-id-type="medline">35679633</pub-id>
          <pub-id pub-id-type="pii">S0049-3848(22)00290-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pirbhulal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>De Albuquerque</surname>
              <given-names>VHC</given-names>
            </name>
          </person-group>
          <article-title>Active balancing mechanism for imbalanced medical data in deep learning–based classification models</article-title>
          <source>ACM Trans Multimedia Comput Commun Appl</source>
          <year>2020</year>
          <volume>16</volume>
          <issue>1s</issue>
          <fpage>1</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.1145/3357253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Imbalance-XGBoost: leveraging weighted and focal losses for binary label-imbalanced classification with XGBoost</article-title>
          <source>Pattern Recognit Lett</source>
          <year>2020</year>
          <volume>136</volume>
          <fpage>190</fpage>
          <lpage>197</lpage>
          <pub-id pub-id-type="doi">10.1016/j.patrec.2020.05.035</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kvamme</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Borgan</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Scheel</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Time-to-event prediction with neural networks and Cox regression</article-title>
          <source>J  Mach Learn Res</source>
          <year>2019</year>
          <volume>20</volume>
          <fpage>1</fpage>
          <lpage>30</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernandez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Herrera</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
          </person-group>
          <article-title>SMOTE for learning from imbalanced data: progress and challenges, marking the 15-year anniversary</article-title>
          <source>J Artif Intell Res</source>
          <year>2018</year>
          <volume>61</volume>
          <fpage>863</fpage>
          <lpage>905</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.1.11192</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yeshaya</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Koushnir</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>On the classification of a small imbalanced cytogenetic image database</article-title>
          <source>IEEE/ACM Trans Comput Biol Bioinform</source>
          <year>2007</year>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>204</fpage>
          <lpage>215</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://core.ac.uk/reader/186701161?utm_source=linkout"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/TCBB.2007.070207</pub-id>
          <pub-id pub-id-type="medline">17473314</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Halbersberg</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wienreb</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Joint maximization of accuracy and information for learning the structure of a Bayesian network classifier</article-title>
          <source>Mach Learn</source>
          <year>2020</year>
          <volume>109</volume>
          <fpage>1039</fpage>
          <lpage>1099</lpage>
          <pub-id pub-id-type="doi">10.1007/s10994-020-05869-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Halbersberg</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Young driver fatal motorcycle accident analysis by jointly maximizing accuracy and information</article-title>
          <source>Accid Anal Prev</source>
          <year>2019</year>
          <volume>129</volume>
          <fpage>350</fpage>
          <lpage>361</lpage>
          <pub-id pub-id-type="doi">10.1016/j.aap.2019.04.016</pub-id>
          <pub-id pub-id-type="medline">31201968</pub-id>
          <pub-id pub-id-type="pii">S0001-4575(18)30456-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaur</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pannu</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Malhi</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>A systematic review on imbalanced data challenges in machine learning</article-title>
          <source>ACM Comput Surv</source>
          <year>2019</year>
          <volume>52</volume>
          <issue>4</issue>
          <fpage>1</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1145/3343440</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mandrekar</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>Receiver operating characteristic curve in diagnostic test assessment</article-title>
          <source>J Thorac Oncol</source>
          <year>2010</year>
          <volume>5</volume>
          <issue>9</issue>
          <fpage>1315</fpage>
          <lpage>1316</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1556-0864(15)30604-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/JTO.0b013e3181ec173d</pub-id>
          <pub-id pub-id-type="medline">20736804</pub-id>
          <pub-id pub-id-type="pii">S1556-0864(15)30604-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lobo</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Jiménez‐Valverde</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Real</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>AUC: a misleading measure of the performance of predictive distribution models</article-title>
          <source>GEB</source>
          <year>2007</year>
          <volume>17</volume>
          <fpage>145</fpage>
          <lpage>151</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1466-8238.2007.00358.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuncheva</surname>
              <given-names>LI</given-names>
            </name>
            <name name-style="western">
              <surname>Arnaiz-González</surname>
              <given-names>Á</given-names>
            </name>
            <name name-style="western">
              <surname>Díez-Pastor</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Gunn</surname>
              <given-names>IAD</given-names>
            </name>
          </person-group>
          <article-title>Instance selection improves geometric mean accuracy: a study on imbalanced data classification</article-title>
          <source>Prog Artif Intell</source>
          <year>2019</year>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>215</fpage>
          <lpage>228</lpage>
          <pub-id pub-id-type="doi">10.1007/s13748-019-00172-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jadhav</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>A novel weighted TPR-TNR measure to assess performance of the classifiers</article-title>
          <source>Expert Syst Appl</source>
          <year>2020</year>
          <volume>152</volume>
          <fpage>113391</fpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2020.113391</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>On the performance of Matthews correlation coefficient (MCC) for imbalanced dataset</article-title>
          <source>Pattern Recognit Lett</source>
          <year>2020</year>
          <volume>136</volume>
          <fpage>71</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1016/j.patrec.2020.03.030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Provost</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Machine learning from imbalanced data sets 101</article-title>
          <source>Proc. AAAI-2000 Work</source>
          <year>2000</year>
          <volume>Online</volume>
          <fpage>3</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cdn.aaai.org/Workshops/2000/WS-00-05/WS00-05-001.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Seo</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Active downsampling for binary classification with an imbalanced dataset</article-title>
          <year>2021</year>
          <conf-name>Proceedings of the IISE Annual Conference</conf-name>
          <conf-date>2021</conf-date>
          <conf-loc>Columbia, MO</conf-loc>
          <fpage>187</fpage>
          <lpage>192</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goutte</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gaussier</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A probabilistic interpretation of precision, recall and F-score, with implication for evaluation</article-title>
          <source>Lect Notes Comput Sci</source>
          <year>2005</year>
          <volume>3408</volume>
          <fpage>345</fpage>
          <lpage>359</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-540-31865-1_25</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>MDClone</collab>
          </person-group>
          <source>The world's most powerful healthcare data platforme</source>
          <access-date>2024-03-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdclone.com/">https://www.mdclone.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Foraker</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michelson</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Pineda Soto</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Colvin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Loh</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kollef</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Maddox</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Evanoff</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Dror</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zamstein</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Payne</surname>
              <given-names>PRO</given-names>
            </name>
          </person-group>
          <article-title>Spot the difference: comparing results of analyses from real patient data and synthetic derivatives</article-title>
          <source>JAMIA Open</source>
          <year>2020</year>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>557</fpage>
          <lpage>566</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33623891"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooaa060</pub-id>
          <pub-id pub-id-type="medline">33623891</pub-id>
          <pub-id pub-id-type="pii">ooaa060</pub-id>
          <pub-id pub-id-type="pmcid">PMC7886551</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reiner Benaim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Almog</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gorelik</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hochberg</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nassar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mashiach</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Khamaisi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lurie</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Azzam</surname>
              <given-names>ZS</given-names>
            </name>
            <name name-style="western">
              <surname>Khoury</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kurnik</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Beyar</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Analyzing medical research results based on synthetic data and their relation to real data results: systematic comparison from five observational studies</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>e16492</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/2/e16492/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16492</pub-id>
          <pub-id pub-id-type="medline">32130148</pub-id>
          <pub-id pub-id-type="pii">v8i2e16492</pub-id>
          <pub-id pub-id-type="pmcid">PMC7059086</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mortazavi-Asl</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Dayal</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>PrefixSpan: mining sequential patterns efficiently by prefix-projected pattern growth</article-title>
          <year>2001</year>
          <conf-name>International Conference on Data Engineering</conf-name>
          <conf-date>June 06 2024</conf-date>
          <conf-loc>Canada</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icde.2001.914830</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Menze</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Kelm</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Masuch</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Himmelreich</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Bachert</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Petrich</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hamprecht</surname>
              <given-names>FA</given-names>
            </name>
          </person-group>
          <article-title>A comparison of random forest and its Gini importance with standard chemometric methods for the feature selection and classification of spectral data</article-title>
          <source>BMC Bioinformatics</source>
          <year>2009</year>
          <volume>10</volume>
          <fpage>213</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-10-213"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-10-213</pub-id>
          <pub-id pub-id-type="medline">19591666</pub-id>
          <pub-id pub-id-type="pii">1471-2105-10-213</pub-id>
          <pub-id pub-id-type="pmcid">PMC2724423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Holmes</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Benchmarking attribute selection techniques for discrete class data mining</article-title>
          <source>IEEE Trans Knowledge Data Eng</source>
          <year>2003</year>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>1437</fpage>
          <lpage>1447</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2003.1245283</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Insights into amyotrophic lateral sclerosis from a machine learning perspective</article-title>
          <source>J Clin Med</source>
          <year>2019</year>
          <volume>8</volume>
          <issue>10</issue>
          <fpage>1578</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jcm8101578"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jcm8101578</pub-id>
          <pub-id pub-id-type="medline">31581566</pub-id>
          <pub-id pub-id-type="pii">jcm8101578</pub-id>
          <pub-id pub-id-type="pmcid">PMC6832919</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hever</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Matot</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bitan</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Machine learning applied to multi-sensor information to reduce false alarm rate in the ICU</article-title>
          <source>J Clin Monit Comput</source>
          <year>2020</year>
          <volume>34</volume>
          <issue>2</issue>
          <fpage>339</fpage>
          <lpage>352</lpage>
          <pub-id pub-id-type="doi">10.1007/s10877-019-00307-x</pub-id>
          <pub-id pub-id-type="medline">30955160</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10877-019-00307-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kashi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Polak</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rokach</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Levy-Tzedek</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A Machine-Learning Model for automatic detection of movement compensations in stroke patients</article-title>
          <source>IEEE Trans Emerg Topics Comput</source>
          <year>2021</year>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>1234</fpage>
          <lpage>1247</lpage>
          <pub-id pub-id-type="doi">10.1109/tetc.2020.2988945</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Avisar</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Guardia-Laguarta</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Area-Gomez</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Surface</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Alcalay</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Lipidomics prediction of Parkinson's disease severity: a machine-learning analysis</article-title>
          <source>J Parkinsons Dis</source>
          <year>2021</year>
          <volume>11</volume>
          <issue>3</issue>
          <fpage>1141</fpage>
          <lpage>1155</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33814463"/>
          </comment>
          <pub-id pub-id-type="doi">10.3233/JPD-202476</pub-id>
          <pub-id pub-id-type="medline">33814463</pub-id>
          <pub-id pub-id-type="pii">JPD202476</pub-id>
          <pub-id pub-id-type="pmcid">PMC8355022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Avisar</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Guardia-Laguarta</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Surface</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Papagiannakis</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Maniati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Antonellou</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Papadimitriou</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Koros</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Athanassiadou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Przedborski</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Stefanis</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Area-Gomez</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Alcalay</surname>
              <given-names>RN</given-names>
            </name>
          </person-group>
          <article-title>Lipid level alteration in human and cellular models of alpha synuclein mutations</article-title>
          <source>NPJ Parkinsons Dis</source>
          <year>2022</year>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>52</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41531-022-00313-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41531-022-00313-y</pub-id>
          <pub-id pub-id-type="medline">35468903</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41531-022-00313-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC9039073</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demsar</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Statistical comparisons of classifiers over multiple data sets</article-title>
          <source>J Mach Learn Res</source>
          <year>2006</year>
          <volume>7</volume>
          <fpage>1</fpage>
          <lpage>30</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yildiz</surname>
              <given-names>OT</given-names>
            </name>
            <name name-style="western">
              <surname>Aslan</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Alpaydin</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Multivariate statistical tests for comparing classification algorithms</article-title>
          <year>2011</year>
          <conf-name>International Conference on Learning and Intelligent Optimization</conf-name>
          <conf-date>2011</conf-date>
          <conf-loc>Berlin, Heidelberg</conf-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>1</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/978-3-642-25566-3_1"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>MZ</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>A random forest based predictor for medical data classification using feature ranking</article-title>
          <source>Informatics Med Unlocked</source>
          <year>2019</year>
          <volume>15</volume>
          <fpage>100180</fpage>
          <pub-id pub-id-type="doi">10.1016/j.imu.2019.100180</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinaga</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Unsupervised K-Means clustering algorithm</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>80716</fpage>
          <lpage>80727</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.2988796</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scarvelis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wells</surname>
              <given-names>PS</given-names>
            </name>
          </person-group>
          <article-title>Diagnosis and treatment of deep-vein thrombosis</article-title>
          <source>CMAJ</source>
          <year>2006</year>
          <volume>175</volume>
          <issue>9</issue>
          <fpage>1087</fpage>
          <lpage>1092</lpage>
          <pub-id pub-id-type="doi">10.1503/cmaj.060366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Banerjee</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Moradzadeh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Amrhein</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mong</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Farri</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lungren</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Comparative effectiveness of convolutional neural network (CNN) and recurrent neural network (RNN) architectures for radiology text report classification</article-title>
          <source>Artif Intell Med</source>
          <year>2019</year>
          <volume>97</volume>
          <fpage>79</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30477892"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2018.11.004</pub-id>
          <pub-id pub-id-type="medline">30477892</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(17)30625-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6533167</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>HL</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>BF</given-names>
            </name>
          </person-group>
          <article-title>Comparison of the Wells score with the revised Geneva score for assessing suspected pulmonary embolism: a systematic review and meta-analysis</article-title>
          <source>J Thromb Thrombolysis</source>
          <year>2016</year>
          <volume>41</volume>
          <issue>3</issue>
          <fpage>482</fpage>
          <lpage>492</lpage>
          <pub-id pub-id-type="doi">10.1007/s11239-015-1250-2</pub-id>
          <pub-id pub-id-type="medline">26178041</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11239-015-1250-2</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
