<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i3e22951</article-id>
      <article-id pub-id-type="pmid">33683212</article-id>
      <article-id pub-id-type="doi">10.2196/22951</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Natural Language Processing and Machine Learning for Identifying Incident Stroke From Electronic Health Records: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Colak</surname>
            <given-names>Cemil</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kaya</surname>
            <given-names>M. Onur</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Yiqing</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2874-8136</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Fu</surname>
            <given-names>Sunyang</given-names>
          </name>
          <degrees>MHI</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1691-5179</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Bielinski</surname>
            <given-names>Suzette J</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2905-5430</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Decker</surname>
            <given-names>Paul A</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3756-4227</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Chamberlain</surname>
            <given-names>Alanna M</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1888-9584</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Roger</surname>
            <given-names>Veronique L</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9347-7865</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Hongfang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2570-3741</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Larson</surname>
            <given-names>Nicholas B</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Health Sciences Research</institution>
            <institution>Mayo Clinic</institution>
            <addr-line>205 3rd Ave SW</addr-line>
            <addr-line>Rochester, MN, 55905</addr-line>
            <country>United States</country>
            <phone>1 507 293 1700</phone>
            <email>Larson.Nicholas@mayo.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3468-4215</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Health Sciences Research</institution>
        <institution>Mayo Clinic</institution>
        <addr-line>Rochester, MN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Nicholas B Larson <email>Larson.Nicholas@mayo.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>3</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>8</day>
        <month>3</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>3</issue>
      <elocation-id>e22951</elocation-id>
      <history>
        <date date-type="received">
          <day>27</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>8</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>25</day>
          <month>8</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>20</day>
          <month>1</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Yiqing Zhao, Sunyang Fu, Suzette J Bielinski, Paul A Decker, Alanna M Chamberlain, Veronique L Roger, Hongfang Liu, Nicholas B Larson. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 08.03.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/3/e22951" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Stroke is an important clinical outcome in cardiovascular research. However, the ascertainment of incident stroke is typically accomplished via time-consuming manual chart abstraction. Current phenotyping efforts using electronic health records for stroke focus on case ascertainment rather than incident disease, which requires knowledge of the temporal sequence of events.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to develop a machine learning–based phenotyping algorithm for incident stroke ascertainment based on diagnosis codes, procedure codes, and clinical concepts extracted from clinical notes using natural language processing.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The algorithm was trained and validated using an existing epidemiology cohort consisting of 4914 patients with atrial fibrillation (AF) with manually curated incident stroke events. Various combinations of feature sets and machine learning classifiers were compared. Using a heuristic rule based on the composition of concepts and codes, we further detected the stroke subtype (ischemic stroke/transient ischemic attack or hemorrhagic stroke) of each identified stroke. The algorithm was further validated using a cohort (n=150) stratified sampled from a population in Olmsted County, Minnesota (N=74,314).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Among the 4914 patients with AF, 740 had validated incident stroke events. The best-performing stroke phenotyping algorithm used clinical concepts, diagnosis codes, and procedure codes as features in a random forest classifier. Among patients with stroke codes in the general population sample, the best-performing model achieved a positive predictive value of 86% (43/50; 95% CI 0.74-0.93) and a negative predictive value of 96% (96/100). For subtype identification, we achieved an accuracy of 83% in the AF cohort and 80% in the general population sample.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We developed and validated a machine learning–based algorithm that performed well for identifying incident stroke and for determining type of stroke. The algorithm also performed well on a sample from a general population, further demonstrating its generalizability and potential for adoption by other institutions.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>stroke</kwd>
        <kwd>natural language processing</kwd>
        <kwd>electronic health records</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Stroke is a syndrome involving a rapid loss of cerebral function with vascular origin [<xref ref-type="bibr" rid="ref1">1</xref>]. The loss of function can result in deep coma or subarachnoid hemorrhage. There are two broad categories of stroke: hemorrhagic and ischemic stroke [<xref ref-type="bibr" rid="ref2">2</xref>]. Hemorrhage is caused by bleeding within the skull cavity, while ischemia is characterized by inadequate blood to supply a part of the brain. Stroke identification is an important outcome for various cardiovascular studies [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. However, a challenge with stroke ascertainment is the inconsistent use of International Classification of Diseases (ICD) codes [<xref ref-type="bibr" rid="ref6">6</xref>], which may result in inaccurate code-based ascertainment of cases [<xref ref-type="bibr" rid="ref7">7</xref>]. Therefore, the time-consuming process of electronic health record (EHR) abstraction remains the gold standard of stroke ascertainment [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>Machine learning has recently gained popularity for its ability to classify patients or make predictions on various aspects of diseases. In contrast to manually curated algorithms based on domain expertise, machine learning is a data-driven approach that can be trained on large data sets to identify and leverage complex feature relationships and improve classification and prediction tasks thereby. In terms of stroke, machine learning algorithms have been applied to predict future stroke cases [<xref ref-type="bibr" rid="ref10">10</xref>], mortality and recurrent strokes [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>], and treatment outcomes [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Most existing phenotyping algorithms have been developed to only differentiate between cases and noncases of diseases [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]; however, ascertaining incident disease (ie, first occurrence of disease) in a population is a more difficult task [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. A recent study by Ni et al [<xref ref-type="bibr" rid="ref21">21</xref>] examined potential predictive features of stroke occurrence including demographic, clinical, and diagnostic characteristics of patients. The authors found that diagnostic tests for stroke, such as computed tomography (CT) and magnetic resonance imaging (MRI), contributed to most of the model performance, and that the optimal feature set included imaging findings, signs and symptoms, interventions, emergency department assessments, findings from angiography and carotid ultrasound tests, ICD codes, substance use (smoking, alcohol, and street drugs) characteristics, and demographics. However, features such as signs and symptoms, substance use characteristics, and demographics may not be specific enough for disease ascertainment, as there is a high prevalence of strokelike symptoms among people without a diagnosis of stroke [<xref ref-type="bibr" rid="ref22">22</xref>]. In addition, incorporating too many features in the model may result in overfitting without appropriate regularization. Another study [<xref ref-type="bibr" rid="ref7">7</xref>] also used ICD and Current Procedural Terminology (CPT) [<xref ref-type="bibr" rid="ref23">23</xref>] codes as features to classify positive, possible, and negative stroke cases. However, stroke-related clinical concepts (including both disease name concepts and symptom concepts) in unstructured clinical notes were not included in this model.</p>
      <p>Rapid adoption of EHRs has enabled secondary use of the EHR data in epidemiological research [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. Previous studies noted the existence of bias using a single type of EHR data (ie, diagnosis codes) [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. To avoid this bias, the Electronic Medical Records and Genomics (eMERGE) consortium [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>] has piloted the development of EHR-based phenotyping algorithms using multiple types of EHR data [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>]. This has given rise to a number of phenotyping algorithms that use both structured EHR data (eg, demographics, diagnosis and procedure codes, laboratory test results, and medications) and unstructured EHR data (eg, clinical notes, imaging reports, and discharge summaries) [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]. However, the eMERGE consortium algorithms are typically focused on identifying cases and noncases rather than characterizing a new-onset (ie, incident) disease in a population. Moreover, extracting information from unstructured clinical text is a nontrivial task that involves natural language processing techniques [<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>].</p>
      <p>In our paper, we address existing challenges for stroke ascertainment, specifically for incident stroke. Our research objective is to develop and validate a machine learning–based phenotyping algorithm to identify incident stroke and detailed stroke subtypes based on three major EHR-derived data elements: clinical concepts extracted from clinical notes; ICD, Ninth Revision (ICD-9) diagnosis codes; and CPT procedure codes.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>This study was approved by the Mayo Clinic Institutional Review Board (no. 17-008818) and is in accordance with the ethical standards mandated by the committee on responsible human experimentation. The data that support the findings of this study are available from the corresponding author upon reasonable request.</p>
      <sec>
        <title>Study Design</title>
        <p>This was a predictive modeling study that used observational cohort data for training and validation. We employed an atrial fibrillation (AF) cohort, in which all incidences of stroke were manually ascertained in a previous study [<xref ref-type="bibr" rid="ref4">4</xref>], to train and test our phenotyping algorithm for the date of incident stroke events. We then evaluated the generalizability of our algorithm in a general population cohort.</p>
      </sec>
      <sec>
        <title>The AF Cohort</title>
        <p>The AF cohort comprised a patient population from Olmsted County, Minnesota, USA [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Olmsted County is an area relatively isolated from other urban centers with only a few providers delivering most care to residents, primarily Mayo Clinic and Olmsted Medical Center [<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref45">45</xref>]. Extracting all health care–related events was completed through the Rochester Epidemiology Project (REP), a records linkage system [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. The REP is a records linkage system that allows retrieval of nearly all health care utilization and outcomes of residents living in Olmsted County. The electronic indexes of the REP include demographic information, diagnostic and procedure codes, health care utilization data, outpatient drug prescriptions, results of laboratory tests, and information about smoking, height, weight, and body mass index. ICD-9 codes and the Mayo Clinic electrocardiograms were obtained among adults aged ≥18 years from 2000 to 2014 to ascertain AF. Patients were identified by the presence of an ICD-9 code for stroke through March 31, 2015, and then validated by manual review of the EHR. Strokes were classified as ischemic strokes/transient ischemic attack or hemorrhagic strokes [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. The first (incident) event of each type of stroke after the incident AF date was ascertained, regardless of whether a patient had a prior stroke. The AF cohort included 4914 validated patients with AF, 1773 of whom were screened for a possible stroke. <xref ref-type="table" rid="table1">Table 1</xref> shows the cohort characteristics. Manual abstraction of the EHR validated the stroke code in 740 patients. Manual ascertainment of stroke and the dates of the events were used as a gold standard to train and test the stroke algorithm.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Atrial fibrillation cohort characteristics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Measure</td>
                <td>Cohort (n=4914)</td>
                <td>Screened (n=1773)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Gender, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>2309 (46.99)</td>
                <td>869 (49.01)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>2605 (53.01)</td>
                <td>904 (50.99)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Age at diagnosis of AF<sup>a</sup> (years), mean</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>76</td>
                <td>80</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>70</td>
                <td>74</td>
              </tr>
              <tr valign="top">
                <td colspan="2">ICD-9<sup>b</sup> diagnosis codes<sup>c</sup>, n</td>
                <td>27,243</td>
                <td>27,243</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>AF: atrial fibrillation.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>ICD-9: International Classification of Diseases, Ninth Revision.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>ICD retrieval was from AF incidence date to March 31, 2015. AF validations were from 2000 to 2014.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Candidate Predictive Features</title>
        <p>The proposed algorithm aimed to identify first (incident) stroke events within a certain time frame. The three major data elements we used were clinical concepts, ICD-9 codes, and CPT codes. To align with the manual review process, only codes and clinical notes from the AF incident date to March 31, 2015, were retrieved and processed. In our analyses, we constructed different models by varying the inclusion of CPT codes and symptom-related clinical concepts in the model feature set and compared different models’ performances.</p>
        <p>Both ICD-9 and CPT codes were extracted from the REP database. Clinical concepts were identified from the major and secondary problem list section of Mayo Clinic EHR, and from clinical notes from other REP sites using a natural language processing system, MedTagger [<xref ref-type="bibr" rid="ref47">47</xref>]. Expert-provided vocabulary was adopted from a previous study [<xref ref-type="bibr" rid="ref48">48</xref>] to extract clinical concepts from unstructured clinical notes. MedTagger enables a series of natural language processing processes, including regular expression matching and positive, negative, or probable identification with ConText [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>], and is insensitive to upper and lower case. MedTagger is also able to determine if the extracted clinical concepts are referring to the patients or their family members, or if the extracted clinical concepts are in present tense and thus are referring to a current event rather than a past medical condition. We considered only documents with positive, present-tense stroke mentions that were referring to patients themselves. Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> lists clinical concepts for 2 major stroke subtypes and stroke-related symptoms. Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> lists ICD-9 codes for 2 stroke subtypes and stroke-related symptoms. Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> lists the CPT codes used in the stroke algorithm.</p>
        <p>Clinical concept dates were determined by the date of the clinical notes from which clinical concepts were extracted, while ICD-9 and CPT code dates were extracted from the REP. Each visit was characterized by clinical concepts, ICD-9, and CPT codes within a 60-day window. The visit date was determined by the earliest date of any of the 3 elements in the 60-day window. If visit dates were within a 60-day window of a confirmed stroke incidence date, they were considered positive instances; otherwise, they were considered negative instances. <xref rid="figure1" ref-type="fig">Figure 1</xref> demonstrates an example with an incident stroke on July 4, 2004. All visits were extracted and included in our data set if there was at least one key word or code during a 60-day window. Nurse abstractors reviewed every visit sequentially until they determined the incidence date to be July 4, 2004. All subsequent visits after a positive stroke incident were not reviewed and thus were not included in our analyses. Since the confirmed stroke incidence date fell in the date range of the third visit (June 24, 2004-August 22, 2004), we considered the combination of codes and clinical concepts in this visit to be predictive of a positive stroke incidence.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Inclusion of clinical concepts and codes on a patient visit timeline. CPT: Current Procedural Terminology; ICD-9: International Classification of Diseases, Ninth Revision.</p>
          </caption>
          <graphic xlink:href="jmir_v23i3e22951_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Analysis</title>
        <p>After incident stroke was confirmed, visits afterwards were not reviewed by abstractors and thus excluded from our overall data set. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows the workflow of the algorithm training and testing process. We created a data set with 9130 confirmed visits (with stroke vs nonstroke labels) among the 1773 patients. In total, there were 746 stroke visits and 8384 nonstroke visits. The stroke incidence count (n=746) was larger than the number of patients with confirmed stroke incidence (740) because incidence dates for different subtypes of stroke (ischemic stroke/ transient ischemic attack and hemorrhagic stroke) were all recorded, such that patients might have had multiple incidence dates. We included data from a randomly selected 79.98% of our screened patients (1418/1773 patients; 7253 visits) as a training set and the remaining 20.02% of our screened patients (355/1773 patients; 1877 visits) were retained as an independent testing set. Due to the outcome imbalance in the data set (positive:negative ratio of about 1:10), we used the synthetic minority oversampling technique [<xref ref-type="bibr" rid="ref51">51</xref>] to create oversampled training data sets with an oversampling percentage of 1000%.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Stroke algorithm training and testing workflow. AF: atrial fibrillation.</p>
          </caption>
          <graphic xlink:href="jmir_v23i3e22951_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>We considered two machine learning classifiers, logistic regression and random forest [<xref ref-type="bibr" rid="ref52">52</xref>], to train our phenotyping models. Logistic regression served as a baseline modeling algorithm. Random forest was also chosen because of its high performance with structured input features and better model flexibility. We also considered the influence of feature groups by varying the inclusion of CPT codes and symptom terms in the input feature set. The hyperparameter tuning of the machine learning models was performed using 10-fold cross-validation. The performance metrics adopted for the machine learning task in the test set were precision, recall, and F score. The oversampling and machine learning modeling training and testing processes were implemented in Weka 3 (University of Waikato) [<xref ref-type="bibr" rid="ref53">53</xref>]. Additional statistical summaries were performed using the R statistical software version 3.6.2 (The R Foundation for Statistical Computing). Quantitative variables are summarized as means, while nominal variables are expressed by counts and percentages.</p>
      </sec>
      <sec>
        <title>Validation Cohort</title>
        <p>We evaluated the generalizability of our model on a sample from a general population cohort of 71,429 patients. This cohort consisted of individuals sampled in Olmsted County, Minnesota on January 1, 2006, with an age ≥30 years and with no prior history of cardiovascular disease. We applied the best performing model based on the leave-out test set to this entire population cohort to generate incident stroke predictions. We then randomly selected 50 patients from those who had no stroke-related features (ie, de facto negative stroke predictions), 50 patients from those who were shown to have negative stroke predictions, and 50 patients from those who were shown to have positive stroke predictions and a predicted incident stroke for evaluation. This verification-based sampling strategy allowed for estimates of positive and negative predictive values (PPVs and NPVs, respectively) by conditioning on algorithm predictions. Under these conditions (n=50), the half-width of the 95% Wilson score CI for the PPVs and separate NPVs would be approximately 0.1 for a true value of 0.85.</p>
        <p>All 150 patient cases were reviewed by 1 nurse abstractor to confirm incident stroke, which served as our gold standard. We recorded model prediction outputs on all patient visits in the 150-patient validation set. We combined visit-level true predictions to generate patient-level incidence predictions by saving only the earliest date of positive predictions as stroke incidences. We compared patient-level incidence predictions with our gold standard. True prediction in our evaluation meant the date of the predicted incident stroke was within 60 days of the abstracted stroke date. A 2 x 2 confusion matrix was used to calculate performance scores for prediction evaluation. Model performance metrics included PPV and NPV using manual evaluation as the gold standard and patient-level predictions to calculate true positives, false positives, true negatives, and false negatives. The uncertainty of these performance estimates was calculated using Wilson score 95% CI for proportions.</p>
        <p>In addition, we developed heuristic rules to distinguish stroke subtype (ischemic stroke/transient ischemic attack or hemorrhagic stroke) of each identified stroke incidence by analyzing the composition of keyword or code input feature sets (in a window of 60 days). We counted the number of keywords or codes for each ischemic stroke/transient ischemic attack and hemorrhagic stroke. If an input feature set contained more keywords or codes for ischemic stroke/transient ischemic attack, then this incidence was considered an ischemic stroke incidence; otherwise, it was considered a hemorrhagic stroke incidence. We only evaluated correct incident stroke predictions from the previous step in the evaluation data set with manually ascertained subtypes as the gold standard. Accuracy was calculated to measure performance of the subtype identification.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Model Selection and Subtype Identification</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the algorithm performance measured on the test set for 8 models run on 4 input combinations and 2 classifiers (logistic regression and random forest). The random forest classifier outperformed the logistic classifier regardless of the feature sets used. Inclusion of CPT codes as features improved the performance for the random forest model with F score increased from 0.836 (Model 3) to 0.905 (Model 1). However, in the logistic model, the inclusion of CPT codes slightly improved the F score from 0.772 (Model 4) to 0.793 (Model 2). Using comparisons to all features (Model 1 and 2) and excluding the symptom terms (Model 6 and 7) achieved better F score (values italicized in <xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Stroke algorithm performance.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="100"/>
            <col width="250"/>
            <col width="90"/>
            <col width="140"/>
            <col width="110"/>
            <col width="100"/>
            <col width="110"/>
            <thead>
              <tr valign="bottom">
                <td>Model</td>
                <td>ICD-9<sup>a</sup></td>
                <td>Clinical concept</td>
                <td>CPT<sup>b</sup></td>
                <td>Classifier</td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Yes</td>
                <td>Symptoms + disease concepts</td>
                <td>Yes</td>
                <td>Random forest</td>
                <td>0.912</td>
                <td>0.906</td>
                <td>0.905</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Yes</td>
                <td>Symptoms + disease concepts</td>
                <td>Yes</td>
                <td>Logistic</td>
                <td>0.807</td>
                <td>0.795</td>
                <td>0.793</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Yes</td>
                <td>Symptoms + disease concepts</td>
                <td>No</td>
                <td>Random forest</td>
                <td>0.835</td>
                <td>0.845</td>
                <td>0.836</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Yes</td>
                <td>Symptoms + disease concepts</td>
                <td>No</td>
                <td>Logistic</td>
                <td>0.791</td>
                <td>0.777</td>
                <td>0.772</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Yes</td>
                <td>Disease-only concept</td>
                <td>Yes</td>
                <td>Random forest</td>
                <td>
                  <italic>0.920</italic>
                </td>
                <td>
                  <italic>0.915</italic>
                </td>
                <td>
                  <italic>0.915</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Yes</td>
                <td>Disease-only concept</td>
                <td>Yes</td>
                <td>Logistic</td>
                <td>0.809</td>
                <td>0.798</td>
                <td>0.796</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Yes</td>
                <td>Disease-only concept</td>
                <td>No</td>
                <td>Random forest</td>
                <td>0.856</td>
                <td>0.847</td>
                <td>0.846</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Yes</td>
                <td>Disease-only concept</td>
                <td>No</td>
                <td>Logistic</td>
                <td>0.779</td>
                <td>0.767</td>
                <td>0.763</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>ICD-9: International Classification of Diseases, Ninth Revision.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>CPT: Current Procedural Terminology.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Model Generalizability</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the distribution of stroke features in the AF cohort and the general population cohort. The AF cohort had a higher proportion of stroke-related codes and concepts. Results from the evaluation of the 150 selected patient records are presented in <xref ref-type="table" rid="table4">Table 4</xref>. Prediction performance corresponded to a PPV of 0.86 (95% CI 0.74-0.93), an NPV without ICD codes of 1.00 (95% CI 0.92-1.00), and an NPV with codes of 0.92 (95% CI 0.90-0.98). No strokes were observed among patients with no eligible stroke ICD codes. For subtype characterization, we achieved an accuracy of 80% (95% CI 0.68-0.89) in the general population sample.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Patient feature distribution post-AF.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="140"/>
            <col width="170"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td>Stroke feature distribution</td>
                <td colspan="2">AF<sup>a</sup> screened</td>
                <td colspan="2">AF nonscreened (n=3141), n (%)</td>
                <td colspan="2">AF cohort total (n=4914), n (%)</td>
                <td colspan="2">Olmsted County cohort (N=71,429), n (%)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Stroke (n=740),<break/> n (%)</td>
                <td>No stroke (n=1033),<break/> n (%)</td>
                <td colspan="6">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ICD-9<sup>b</sup>+CPT<sup>c</sup>+CC<sup>d</sup></td>
                <td>654 (88.37)</td>
                <td>379 (36.69)</td>
                <td colspan="2">0 (0)</td>
                <td colspan="2">1033 (21.02)</td>
                <td colspan="2">2726 (3.82)</td>
              </tr>
              <tr valign="top">
                <td>ICD-9+CPT</td>
                <td>66 (8.92)</td>
                <td>596 (57.70)</td>
                <td colspan="2">0 (0)</td>
                <td colspan="2">662 (13.47)</td>
                <td colspan="2">1018 (1.42)</td>
              </tr>
              <tr valign="top">
                <td>ICD-9+CC</td>
                <td>9 (1.22)</td>
                <td>12 (1.16)</td>
                <td colspan="2">0 (0)</td>
                <td colspan="2">21 (0.43)</td>
                <td colspan="2">48 (0.067)</td>
              </tr>
              <tr valign="top">
                <td>CPT+CC</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2">167 (5.32)</td>
                <td colspan="2">167 (3.40)</td>
                <td colspan="2">1595 (2.23)</td>
              </tr>
              <tr valign="top">
                <td>ICD-9</td>
                <td>11 (1.49)</td>
                <td>46 (4.45)</td>
                <td colspan="2">0 (0)</td>
                <td colspan="2">57 (1.16)</td>
                <td colspan="2">194 (0.27)</td>
              </tr>
              <tr valign="top">
                <td>CPT</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2">1736 (55.27)</td>
                <td colspan="2">1736 (35.33)</td>
                <td colspan="2">17,433 (24.40)</td>
              </tr>
              <tr valign="top">
                <td>CC</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2">11 (0.35)</td>
                <td colspan="2">11 (0.24)</td>
                <td colspan="2">566 (0.79)</td>
              </tr>
              <tr valign="top">
                <td>None</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2">1227 (39.06)</td>
                <td colspan="2">1227 (24.97)</td>
                <td colspan="2">47,849 (66.99)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>AF: atrial fibrillation.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>ICD-9: International Classification of Diseases, Ninth Revision.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>CPT: Current Procedural Terminology.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>CC: clinical concepts.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Generalizability analysis results from the Olmsted County cohort.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="230"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Gold standard</td>
                <td colspan="3">Stroke algorithm prediction (N=150)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Negative (n=100)</td>
                <td>Positive (n=50)</td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>No ICD-9<sup>a</sup> codes (n=50)</td>
                <td>Predicted no stroke (n=50)</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Stroke</td>
                <td>0</td>
                <td>4</td>
                <td>43</td>
              </tr>
              <tr valign="top">
                <td>No Stroke</td>
                <td>50</td>
                <td>46</td>
                <td>7</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>ICD-9: International Classification of Diseases, Ninth Revision.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The rapid expansion of information available in EHRs opens new opportunities to combine structured and unstructured data for research. Advances in machine learning methods and tools facilitate the combination of multimodal clinical data for effective development of phenotyping algorithms. However, performance of stroke electronic phenotyping algorithms varies by stroke subtypes [<xref ref-type="bibr" rid="ref25">25</xref>] and phenotyping tasks (ie, case vs noncase or incident stroke phenotyping). Our previous study showed that when naïve ICD codes with clinical concept matching were used, stroke incidence identification had a PPV of 60.6% while case-versus-noncase identification had a much higher PPV of 88.7% [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <p>In this study, we included clinical concepts extracted from clinical notes along with ICD-9 and CPT codes for incident stroke ascertainment. The rationale to add CPT codes is that diagnosis of stroke usually needs to be confirmed by imaging evidence and will probably be followed by therapeutic procedures. Thus, the addition of CPT codes in the model could potentially help to reduce the information redundancy effect by distinguishing between past and current events recorded in clinical notes. Our algorithm closely resembles the ascertainment process (chart review) of clinicians, which uses multiple types of EHR data (eg, diagnoses and procedure codes, unstructured clinical notes) in a parsimonious manner. Due to the redundancy and temporal ambiguity in unstructured clinical notes, we needed to construct a data set with sufficient and interpretable features from multimodal clinical data.</p>
        <p>We found that the random forest generated better results, while the addition of CPT codes improved overall performance. This may be because imaging procedures, especially head CT or MRI, are critical in the diagnosis of stroke. Therefore, CPT codes of such procedures can be important indicators for distinguishing between incident and historical events. In addition, ICD codes and therapeutic procedures can vary significantly between incident and recurrent events. Meanwhile, we observed that the additions of stroke-related symptom concepts were not helpful for the phenotyping task. This may be due to the fact that our stroke incidence ascertainment depends largely on the ubiquitous nature of many stroke-related symptoms: they may be stroke-related but not necessarily stroke specific. Additionally, ascertainment requires well-documented evidence, such as imaging or imaging reports. Without properly recorded evidence, patients are not likely to be ascertained as stroke.</p>
        <p>Our generalizability evaluation demonstrates that models trained using a specific disease cohort for incident stroke ascertainment can generalize well to a general patient population. This is very encouraging given there are many existing patient cohorts available. Secondary use of these patient cohorts would be a cost-effective way for developing machine learning–based phenotyping algorithms. The study also illustrates that incorporating structured EHR data, such as CPT codes, can effectively distinguish incident stroke mentions from historical events in the clinical notes.</p>
        <p>One limitation of our study is the dependence of domain experts to provide relevant clinical concepts, ICD-9 codes, and CPT codes. In the future, we will explore advance feature engineering approaches to identify those relevant concepts or codes automatically or semiautomatically. We are also aware that our imbalance cohort data and oversampling strategies might have introduced overfitting. Although our evaluation in the general population proved the performance of the algorithm, in the future, we can adopt a case–control matching strategy to deal with imbalanced data and mitigate the potential overfitting issue. In addition, new treatment strategies (mechanical thrombectomy) to treat stroke have been in the market in recent years, and thus the features used in our algorithm could have different weights for predictions of events in different temporal settings. A more precise strategy could consider using different features for prediction tasks in different time frames, where variations in clinical knowledge and care path have been considered.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In conclusion, the high prevalence of stroke and the lack of an efficient algorithm to confirm incident stroke events necessitate the development of an effective and interpretable algorithm to identify incident stroke occurrences. In this paper, we described our efforts to develop and validate an EHR-based algorithm that accurately identifies incident stroke events and goes beyond typical case-versus-noncase stroke identification. Our algorithm’s good performance in a general population sample demonstrates its generalizability and potential to be adopted by other institutions.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>ICD-9, CPT codes, and clinical concepts for two major stroke subtypes and stroke-related symptoms.</p>
        <media xlink:href="jmir_v23i3e22951_app1.docx" xlink:title="DOCX File , 34 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AF</term>
          <def>
            <p>atrial fibrillation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CPT</term>
          <def>
            <p>Current Procedural Terminology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CT</term>
          <def>
            <p>computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">eMERGE</term>
          <def>
            <p>Electronic Medical Records and Genomics</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ICD-9</term>
          <def>
            <p>International Classification of Diseases, Ninth Revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MRI</term>
          <def>
            <p>magnetic resonance imaging</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NPV</term>
          <def>
            <p>negative predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">REP</term>
          <def>
            <p>Rochester Epidemiology Project</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank the other investigators, the staff, and the participants of the stroke phenotyping study for their valuable contributions. The stroke phenotyping study was conducted by a collaborative team of researchers from the Department of Health Sciences Research at Mayo Clinic and used 2 cohorts: the AF cohort and the Olmsted County cohort. This study was supported by grants from the National Institutes of Health (no. R01 HL136659 and R21 AG062580) and the American Heart Association (no. 11SDG7260039), and was made possible using the resources of the Rochester Epidemiology Project (no. R01 AG034676).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>YZ had full access to all study data and takes responsibility for the integrity of the data and the accuracy of the data analysis. All authors were responsible for study concept; design, acquisition, analysis, or interpretation of data; critical revision of the manuscript for important intellectual content; and administrative, technical, or material support. YZ was responsible for the drafting of the manuscript. YZ and NBL were responsible for statistical analysis. SB and AMC were responsible for obtaining funding. HL, SB, and NBL were responsible for study supervision.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bonita</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of stroke</article-title>
          <source>Lancet</source>
          <year>1992</year>
          <month>03</month>
          <day>08</day>
          <volume>339</volume>
          <issue>8789</issue>
          <fpage>342</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1016/0140-6736(92)91658-u</pub-id>
          <pub-id pub-id-type="medline">1346420</pub-id>
          <pub-id pub-id-type="pii">0140-6736(92)91658-U</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Clinical diagnosis of stroke subtypes</article-title>
          <source>UptoDate</source>
          <access-date>2020-07-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.uptodate.com/contents/clinical-diagnosis-of-stroke-subtypes">https://www.uptodate.com/contents/clinical-diagnosis-of-stroke-subtypes</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morley</surname>
              <given-names>KI</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Denaxas</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Hunter</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Perel</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Timmis</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Schilling</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hemingway</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Defining disease phenotypes using national linked electronic health records: a case study of atrial fibrillation</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>11</issue>
          <fpage>e110900</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0110900"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0110900</pub-id>
          <pub-id pub-id-type="medline">25369203</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-25932</pub-id>
          <pub-id pub-id-type="pmcid">PMC4219705</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chamberlain</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Alonso</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gersh</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Killian</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Roger</surname>
              <given-names>VL</given-names>
            </name>
          </person-group>
          <article-title>No decline in the risk of stroke following incident atrial fibrillation since 2000 in the community: a concerning trend</article-title>
          <source>J Am Heart Assoc</source>
          <year>2016</year>
          <month>06</month>
          <day>13</day>
          <volume>5</volume>
          <issue>6</issue>
          <fpage>e003408</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ahajournals.org/doi/10.1161/JAHA.116.003408?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/JAHA.116.003408</pub-id>
          <pub-id pub-id-type="medline">27412902</pub-id>
          <pub-id pub-id-type="pii">JAHA.116.003408</pub-id>
          <pub-id pub-id-type="pmcid">PMC4937280</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scannapieco</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Bush</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Paju</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Associations between periodontal disease and risk for atherosclerosis, cardiovascular disease, and stroke. A systematic review</article-title>
          <source>Ann Periodontol</source>
          <year>2003</year>
          <month>12</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>38</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1902/annals.2003.8.1.38</pub-id>
          <pub-id pub-id-type="medline">14971247</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Slee</surname>
              <given-names>VN</given-names>
            </name>
          </person-group>
          <article-title>The International Classification of Diseases: ninth revision (ICD-9)</article-title>
          <source>Ann Intern Med</source>
          <year>1978</year>
          <month>03</month>
          <volume>88</volume>
          <issue>3</issue>
          <fpage>424</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-88-3-424</pub-id>
          <pub-id pub-id-type="medline">629506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imran</surname>
              <given-names>Tasnim F</given-names>
            </name>
            <name name-style="western">
              <surname>Posner</surname>
              <given-names>Daniel</given-names>
            </name>
            <name name-style="western">
              <surname>Honerlaw</surname>
              <given-names>Jacqueline</given-names>
            </name>
            <name name-style="western">
              <surname>Vassy</surname>
              <given-names>Jason L</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Rebecca J</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>Yuk-Lam</given-names>
            </name>
            <name name-style="western">
              <surname>Kittner</surname>
              <given-names>Steven J</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>Katherine P</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Tianxi</given-names>
            </name>
            <name name-style="western">
              <surname>O'Donnell</surname>
              <given-names>Christopher J</given-names>
            </name>
            <name name-style="western">
              <surname>Djousse</surname>
              <given-names>Luc</given-names>
            </name>
            <name name-style="western">
              <surname>Gagnon</surname>
              <given-names>David R</given-names>
            </name>
            <name name-style="western">
              <surname>Gaziano</surname>
              <given-names>J Michael</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>Peter Wf</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>Kelly</given-names>
            </name>
          </person-group>
          <article-title>A phenotyping algorithm to identify acute ischemic stroke accurately from a national biobank: the Million Veteran Program</article-title>
          <source>Clin Epidemiol</source>
          <year>2018</year>
          <volume>10</volume>
          <fpage>1509</fpage>
          <lpage>1521</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.2147/CLEP.S160764"/>
          </comment>
          <pub-id pub-id-type="doi">10.2147/CLEP.S160764</pub-id>
          <pub-id pub-id-type="medline">30425582</pub-id>
          <pub-id pub-id-type="pii">clep-10-1509</pub-id>
          <pub-id pub-id-type="pmcid">PMC6201999</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coull</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bull</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Giles</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rothwell</surname>
              <given-names>P</given-names>
            </name>
            <collab>Oxford Vascular (OXVASC) Study</collab>
          </person-group>
          <article-title>Direct assessment of completeness of ascertainment in a stroke incidence study</article-title>
          <source>Stroke</source>
          <year>2004</year>
          <month>09</month>
          <volume>35</volume>
          <issue>9</issue>
          <fpage>2041</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1161/01.STR.0000137605.48864.2f</pub-id>
          <pub-id pub-id-type="medline">15256682</pub-id>
          <pub-id pub-id-type="pii">01.STR.0000137605.48864.2f</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thangaraj</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kummer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lorberbaum</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Elkind</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tatonetti</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Comparative analysis, applications, and interpretation of electronic health record-based stroke phenotyping methods</article-title>
          <source>BioData Min</source>
          <year>2020</year>
          <month>12</month>
          <day>07</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>21</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://biodatamining.biomedcentral.com/articles/10.1186/s13040-020-00230-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13040-020-00230-x</pub-id>
          <pub-id pub-id-type="medline">33372632</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13040-020-00230-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC7720570</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>KGM</given-names>
            </name>
            <name name-style="western">
              <surname>Bots</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Salonen</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Elwood</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Freire de Concalves</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nikitin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sivenius</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Inzitari</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Benetou</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Tuomilehto</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Koudstaal</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Grobbee</surname>
              <given-names>DE</given-names>
            </name>
          </person-group>
          <article-title>Prediction of stroke in the general population in Europe (EUROSTROKE): Is there a role for fibrinogen and electrocardiography?</article-title>
          <source>J Epidemiol Community Health</source>
          <year>2002</year>
          <month>03</month>
          <volume>56 Suppl 1</volume>
          <fpage>i30</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jech.bmj.com/cgi/pmidlookup?view=long&#38;pmid=11815642"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jech.56.suppl_1.i30</pub-id>
          <pub-id pub-id-type="medline">11815642</pub-id>
          <pub-id pub-id-type="pmcid">PMC1765507</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>King Chung</given-names>
            </name>
            <name name-style="western">
              <surname>Speier</surname>
              <given-names>William</given-names>
            </name>
            <name name-style="western">
              <surname>El-Saden</surname>
              <given-names>Suzie</given-names>
            </name>
            <name name-style="western">
              <surname>Liebeskind</surname>
              <given-names>David S</given-names>
            </name>
            <name name-style="western">
              <surname>Saver</surname>
              <given-names>Jeffery L</given-names>
            </name>
            <name name-style="western">
              <surname>Bui</surname>
              <given-names>Alex A T</given-names>
            </name>
            <name name-style="western">
              <surname>Arnold</surname>
              <given-names>Corey W</given-names>
            </name>
          </person-group>
          <article-title>Predicting discharge mortality after acute ischemic stroke using balanced data</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2014</year>
          <volume>2014</volume>
          <fpage>1787</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25954451"/>
          </comment>
          <pub-id pub-id-type="medline">25954451</pub-id>
          <pub-id pub-id-type="pmcid">PMC4419881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chuang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Random forest can predict 30-day mortality of spontaneous intracerebral hemorrhage with remarkable discrimination</article-title>
          <source>Eur J Neurol</source>
          <year>2010</year>
          <month>07</month>
          <volume>17</volume>
          <issue>7</issue>
          <fpage>945</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1468-1331.2010.02955.x</pub-id>
          <pub-id pub-id-type="medline">20136650</pub-id>
          <pub-id pub-id-type="pii">ENE2955</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Colak</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Karaman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Turtay</surname>
              <given-names>MG</given-names>
            </name>
          </person-group>
          <article-title>Application of knowledge discovery process on the prediction of stroke</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2015</year>
          <month>05</month>
          <volume>119</volume>
          <issue>3</issue>
          <fpage>181</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2015.03.002</pub-id>
          <pub-id pub-id-type="medline">25827533</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(15)00056-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Asadi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for outcome prediction of acute ischemic stroke post intra-arterial therapy</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>e88225</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0088225"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0088225</pub-id>
          <pub-id pub-id-type="medline">24520356</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-41819</pub-id>
          <pub-id pub-id-type="pmcid">PMC3919736</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Crawford</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Ritchie</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Bielinski</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Basford</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Bradford</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chai</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Bastarache</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zuvich</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Peissig</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Carrell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ramirez</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wilke</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Pacheco</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Kho</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Hayes</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Matsumoto</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kopp</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Newton</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Jarvik</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manolio</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Chisholm</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>McCarty</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Masys</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Roden</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>de Andrade</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Variants near FOXE1 are associated with hypothyroidism and other thyroid conditions: using electronic medical records for genome- and phenome-wide studies</article-title>
          <source>Am J Hum Genet</source>
          <year>2011</year>
          <month>10</month>
          <day>07</day>
          <volume>89</volume>
          <issue>4</issue>
          <fpage>529</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0002-9297(11)00398-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ajhg.2011.09.008</pub-id>
          <pub-id pub-id-type="medline">21981779</pub-id>
          <pub-id pub-id-type="pii">S0002-9297(11)00398-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC3188836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Carrell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kho</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Linneman</surname>
              <given-names>James G</given-names>
            </name>
            <name name-style="western">
              <surname>Pacheco</surname>
              <given-names>Jennifer A</given-names>
            </name>
            <name name-style="western">
              <surname>Peissig</surname>
              <given-names>Peggy</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>Luke</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>Noah</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>Christopher G</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>Jyotishman</given-names>
            </name>
          </person-group>
          <article-title>Analyzing the heterogeneity and complexity of Electronic Health Record oriented phenotyping algorithms</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>274</fpage>
          <lpage>83</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22195079"/>
          </comment>
          <pub-id pub-id-type="medline">22195079</pub-id>
          <pub-id pub-id-type="pmcid">PMC3243189</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Overby</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gottesman</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Haerian</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Perotte</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bruce</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Talwalkar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bottinger</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A collaborative approach to developing an electronic health record phenotyping algorithm for drug-induced liver injury</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <month>12</month>
          <volume>20</volume>
          <issue>e2</issue>
          <fpage>e243</fpage>
          <lpage>52</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23837993"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001930</pub-id>
          <pub-id pub-id-type="medline">23837993</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001930</pub-id>
          <pub-id pub-id-type="pmcid">PMC3861914</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castro</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Minnier</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffnagle</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Block</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weill</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Nadal-Vicens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pollastri</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenquist</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ongur</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sklar</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Perlis</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Smoller</surname>
              <given-names>JW</given-names>
            </name>
            <collab>International Cohort Collection for Bipolar Disorder Consortium</collab>
          </person-group>
          <article-title>Validation of electronic health record phenotyping of bipolar disorder cases and controls</article-title>
          <source>Am J Psychiatry</source>
          <year>2015</year>
          <month>04</month>
          <volume>172</volume>
          <issue>4</issue>
          <fpage>363</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25827034"/>
          </comment>
          <pub-id pub-id-type="doi">10.1176/appi.ajp.2014.14030423</pub-id>
          <pub-id pub-id-type="medline">25827034</pub-id>
          <pub-id pub-id-type="pmcid">PMC4441333</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feigin</surname>
              <given-names>Valery L</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>Kristie</given-names>
            </name>
          </person-group>
          <article-title>Editorial comment--Stroke incidence studies one step closer to the elusive gold standard?</article-title>
          <source>Stroke</source>
          <year>2004</year>
          <month>09</month>
          <volume>35</volume>
          <issue>9</issue>
          <fpage>2045</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="medline">15331801</pub-id>
          <pub-id pub-id-type="pii">35/9/2045</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hollands</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marteau</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Fletcher</surname>
              <given-names>PC</given-names>
            </name>
          </person-group>
          <article-title>Non-conscious processes in changing health-related behaviour: a conceptual analysis and framework</article-title>
          <source>Health Psychology Review</source>
          <year>2016</year>
          <month>02</month>
          <day>16</day>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>381</fpage>
          <lpage>394</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1080/17437199.2015.1138093"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/17437199.2015.1138093</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Alwell</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moomaw</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Adeoye</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Flaherty</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Ferioli</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>De Los Rios La Rosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Martini</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khatri</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kleindorfer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kissela</surname>
              <given-names>BM</given-names>
            </name>
          </person-group>
          <article-title>Towards phenotyping stroke: leveraging data from a large-scale epidemiological study to detect stroke diagnosis</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>e0192586</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0192586"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0192586</pub-id>
          <pub-id pub-id-type="medline">29444182</pub-id>
          <pub-id pub-id-type="pii">PONE-D-17-01062</pub-id>
          <pub-id pub-id-type="pmcid">PMC5812624</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Howard</surname>
              <given-names>VJ</given-names>
            </name>
            <name name-style="western">
              <surname>McClure</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Meschia</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Pulley</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Orr</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Friday</surname>
              <given-names>GH</given-names>
            </name>
          </person-group>
          <article-title>High prevalence of stroke symptoms among persons without a diagnosis of stroke or transient ischemic attack in a general population: the REasons for Geographic And Racial Differences in Stroke (REGARDS) study</article-title>
          <source>Arch Intern Med</source>
          <year>2006</year>
          <month>10</month>
          <day>09</day>
          <volume>166</volume>
          <issue>18</issue>
          <fpage>1952</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1001/archinte.166.18.1952</pub-id>
          <pub-id pub-id-type="medline">17030827</pub-id>
          <pub-id pub-id-type="pii">166/18/1952</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <article-title>How a CPT code becomes a code</article-title>
          <source>American Speech-Language-Hearing Association</source>
          <access-date>2020-07-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.asha.org/uploadedFiles/How-A-CPT-Code-Becomes-A-Code.pdf">https://www.asha.org/uploadedFiles/How-A-CPT-Code-Becomes-A-Code.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Denaxas</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Herrett</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Kalra</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hingorani</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Kivimaki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Timmis</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Smeeth</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hemingway</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Data resource profile: cardiovascular disease research using linked bespoke studies and electronic health records (CALIBER)</article-title>
          <source>Int J Epidemiol</source>
          <year>2012</year>
          <month>12</month>
          <volume>41</volume>
          <issue>6</issue>
          <fpage>1625</fpage>
          <lpage>38</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23220717"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ije/dys188</pub-id>
          <pub-id pub-id-type="medline">23220717</pub-id>
          <pub-id pub-id-type="pii">dys188</pub-id>
          <pub-id pub-id-type="pmcid">PMC3535749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Woodfield</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>I</given-names>
            </name>
            <collab>UK Biobank Stroke Outcomes Group</collab>
            <collab>UK Biobank Follow-Up and Outcomes Working Group</collab>
            <name name-style="western">
              <surname>Sudlow</surname>
              <given-names>CLM</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of electronic health record data for identifying stroke cases in large-scale epidemiological studies: a systematic review from the UK Biobank Stroke Outcomes Group</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>10</issue>
          <fpage>e0140533</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0140533"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0140533</pub-id>
          <pub-id pub-id-type="medline">26496350</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-01831</pub-id>
          <pub-id pub-id-type="pmcid">PMC4619732</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>'t</surname>
              <given-names>JGW</given-names>
            </name>
            <name name-style="western">
              <surname>van</surname>
              <given-names>SEM</given-names>
            </name>
            <name name-style="western">
              <surname>Sturkenboom</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Kors</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Automating classification of free-text electronic health records for epidemiological studies</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2012</year>
          <month>06</month>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>651</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1002/pds.3205</pub-id>
          <pub-id pub-id-type="medline">22271492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schellenbaum</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Heckbert</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Rea</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Lumley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kitzman</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Roger</surname>
              <given-names>VL</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Psaty</surname>
              <given-names>BM</given-names>
            </name>
          </person-group>
          <article-title>Congestive heart failure incidence and prognosis: case identification using central adjudication versus hospital discharge diagnoses</article-title>
          <source>Ann Epidemiol</source>
          <year>2006</year>
          <month>03</month>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>115</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1016/j.annepidem.2005.02.012</pub-id>
          <pub-id pub-id-type="medline">15964203</pub-id>
          <pub-id pub-id-type="pii">S1047-2797(05)00085-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pakhomov</surname>
              <given-names>Serguei</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>Susan A</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>Steven J</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>Christopher G</given-names>
            </name>
            <name name-style="western">
              <surname>Meverden</surname>
              <given-names>Ryan</given-names>
            </name>
            <name name-style="western">
              <surname>Roger</surname>
              <given-names>Véronique L</given-names>
            </name>
          </person-group>
          <article-title>Electronic medical records for clinical research: application to the identification of heart failure</article-title>
          <source>Am J Manag Care</source>
          <year>2007</year>
          <month>06</month>
          <volume>13</volume>
          <issue>6 Part 1</issue>
          <fpage>281</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ajmc.com/pubMed.php?pii=3319"/>
          </comment>
          <pub-id pub-id-type="medline">17567225</pub-id>
          <pub-id pub-id-type="pii">3319</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ermenc</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Minimizing mistakes in clinical diagnosis</article-title>
          <source>J Forensic Sci</source>
          <year>1999</year>
          <month>07</month>
          <volume>44</volume>
          <issue>4</issue>
          <fpage>810</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="medline">10432615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCarty</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Chisholm</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jarvik</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Masys</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Ritchie</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Roden</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Struewing</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>WA</given-names>
            </name>
            <collab>eMERGE Team</collab>
          </person-group>
          <article-title>The eMERGE Network: a consortium of biorepositories linked to electronic medical records data for conducting genomic studies</article-title>
          <source>BMC Med Genomics</source>
          <year>2011</year>
          <month>01</month>
          <day>26</day>
          <volume>4</volume>
          <fpage>13</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedgenomics.biomedcentral.com/articles/10.1186/1755-8794-4-13"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1755-8794-4-13</pub-id>
          <pub-id pub-id-type="medline">21269473</pub-id>
          <pub-id pub-id-type="pii">1755-8794-4-13</pub-id>
          <pub-id pub-id-type="pmcid">PMC3038887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gottesman</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Kuivaniemi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tromp</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Faucett</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manolio</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Sanderson</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Kannry</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zinberg</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Basford</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Brilliant</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chisholm</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Connolly</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Crosslin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Gallego</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Haines</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Hakonarson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Harley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jarvik</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>McCarty</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ritchie</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Roden</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Böttinger</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>e</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The Electronic Medical Records and Genomics (eMERGE) Network: past, present, and future</article-title>
          <source>Genet Med</source>
          <year>2013</year>
          <month>10</month>
          <volume>15</volume>
          <issue>10</issue>
          <fpage>761</fpage>
          <lpage>71</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23743551"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/gim.2013.72</pub-id>
          <pub-id pub-id-type="medline">23743551</pub-id>
          <pub-id pub-id-type="pii">gim201372</pub-id>
          <pub-id pub-id-type="pmcid">PMC3795928</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Chapter 13: Mining electronic health records in the genomics era</article-title>
          <source>PLoS Comput Biol</source>
          <year>2012</year>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e1002823</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pcbi.1002823"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002823</pub-id>
          <pub-id pub-id-type="medline">23300414</pub-id>
          <pub-id pub-id-type="pii">PCOMPBIOL-D-12-01458</pub-id>
          <pub-id pub-id-type="pmcid">PMC3531280</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kho</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Hayes</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen-Torvik</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Pacheco</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Peissig</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bielinski</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Leibson</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Jarvik</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Crosslin</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Carlson</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Newton</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Chisholm</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Lowe</surname>
              <given-names>WL</given-names>
            </name>
          </person-group>
          <article-title>Use of diverse electronic medical record systems to identify genetic risk for type 2 diabetes within a genome-wide association study</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>212</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22101970"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000439</pub-id>
          <pub-id pub-id-type="medline">22101970</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000439</pub-id>
          <pub-id pub-id-type="pmcid">PMC3277617</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ritchie</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Zuvich</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Crawford</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Schildcrout</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Bastarache</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ramirez</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Mosley</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Pulley</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Basford</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Bradford</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>LV</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>McCarty</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Chisholm</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Kho</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Carlson</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Jarvik</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Sotoodehnia</surname>
              <given-names>N</given-names>
            </name>
            <collab>Cohorts for HeartAging Research in Genomic Epidemiology (CHARGE) QRS Group</collab>
            <name name-style="western">
              <surname>Manolio</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Masys</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Haines</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Roden</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Genome- and phenome-wide analyses of cardiac conduction identifies markers of arrhythmia risk</article-title>
          <source>Circulation</source>
          <year>2013</year>
          <month>04</month>
          <day>02</day>
          <volume>127</volume>
          <issue>13</issue>
          <fpage>1377</fpage>
          <lpage>85</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23463857"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/CIRCULATIONAHA.112.000604</pub-id>
          <pub-id pub-id-type="medline">23463857</pub-id>
          <pub-id pub-id-type="pii">CIRCULATIONAHA.112.000604</pub-id>
          <pub-id pub-id-type="pmcid">PMC3713791</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Feblowitz</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Maloney</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Wilcox</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Ramelson</surname>
              <given-names>HZ</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>LI</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
          </person-group>
          <article-title>A method and knowledge base for automated inference of patient problems from structured data in an electronic medical record</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>6</issue>
          <fpage>859</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21613643"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000121</pub-id>
          <pub-id pub-id-type="medline">21613643</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000121</pub-id>
          <pub-id pub-id-type="pmcid">PMC3197992</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garvin</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>DuVall</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Bolton</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Heavirland</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pickard</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Heidenreich</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Samore</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>MK</given-names>
            </name>
          </person-group>
          <article-title>Automated extraction of ejection fraction for quality measurement using regular expressions in Unstructured Information Management Architecture (UIMA) for heart failure</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>859</fpage>
          <lpage>66</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22437073"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000535</pub-id>
          <pub-id pub-id-type="medline">22437073</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000535</pub-id>
          <pub-id pub-id-type="pmcid">PMC3422820</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Percha</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Nassif</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lipson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Burnside</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Automatic classification of mammography reports by BI-RADS breast tissue composition class</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>913</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22291166"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000607</pub-id>
          <pub-id pub-id-type="medline">22291166</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000607</pub-id>
          <pub-id pub-id-type="pmcid">PMC3422822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenbloom</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Mani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A study of machine-learning-based approaches to extract clinical entities and their assertions from discharge summaries</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>601</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21508414"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000163</pub-id>
          <pub-id pub-id-type="medline">21508414</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000163</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>EI</given-names>
            </name>
          </person-group>
          <article-title>Feature engineering combined with machine learning and rule-based methods for structured information extraction from narrative clinical discharge summaries</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>824</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22586067"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000776</pub-id>
          <pub-id pub-id-type="medline">22586067</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000776</pub-id>
          <pub-id pub-id-type="pmcid">PMC3422834</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>D'Avolio</surname>
              <given-names>LW</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fiore</surname>
              <given-names>LD</given-names>
            </name>
          </person-group>
          <article-title>Automated concept-level information extraction to reduce the need for custom software and rules development</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>607</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21697292"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000183</pub-id>
          <pub-id pub-id-type="medline">21697292</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000183</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168318</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kahn</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Clinical research informatics: a conceptual perspective</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <month>06</month>
          <volume>19</volume>
          <issue>e1</issue>
          <fpage>e36</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22523344"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2012-000968</pub-id>
          <pub-id pub-id-type="medline">22523344</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2012-000968</pub-id>
          <pub-id pub-id-type="pmcid">PMC3392857</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chamberlain</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Gersh</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Alonso</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>LY</given-names>
            </name>
            <name name-style="western">
              <surname>Berardi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Manemann</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Killian</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Roger</surname>
              <given-names>VL</given-names>
            </name>
          </person-group>
          <article-title>Decade-long trends in atrial fibrillation incidence and survival: a community study</article-title>
          <source>Am J Med</source>
          <year>2015</year>
          <month>03</month>
          <volume>128</volume>
          <issue>3</issue>
          <fpage>260</fpage>
          <lpage>7.e1</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25446299"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.amjmed.2014.10.030</pub-id>
          <pub-id pub-id-type="medline">25446299</pub-id>
          <pub-id pub-id-type="pii">S0002-9343(14)00971-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4340721</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>St Sauver</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Grossardt</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Yawn</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rocca</surname>
              <given-names>WA</given-names>
            </name>
          </person-group>
          <article-title>Use of a medical records linkage system to enumerate a dynamic population over time: the Rochester epidemiology project</article-title>
          <source>Am J Epidemiol</source>
          <year>2011</year>
          <month>05</month>
          <day>01</day>
          <volume>173</volume>
          <issue>9</issue>
          <fpage>1059</fpage>
          <lpage>68</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21430193"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/aje/kwq482</pub-id>
          <pub-id pub-id-type="medline">21430193</pub-id>
          <pub-id pub-id-type="pii">kwq482</pub-id>
          <pub-id pub-id-type="pmcid">PMC3105274</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rocca</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Yawn</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>St Sauver</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Grossardt</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>History of the Rochester Epidemiology Project: half a century of medical records linkage in a US population</article-title>
          <source>Mayo Clin Proc</source>
          <year>2012</year>
          <month>12</month>
          <volume>87</volume>
          <issue>12</issue>
          <fpage>1202</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23199802"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.mayocp.2012.08.012</pub-id>
          <pub-id pub-id-type="medline">23199802</pub-id>
          <pub-id pub-id-type="pii">S0025-6196(12)00895-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC3541925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rocca</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Grossardt</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Brue</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Bock-Goodner</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Chamberlain</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Finney Rutten</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>St Sauver</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Data Resource Profile: Expansion of the Rochester Epidemiology Project medical records-linkage system (E-REP)</article-title>
          <source>Int J Epidemiol</source>
          <year>2018</year>
          <month>04</month>
          <day>01</day>
          <volume>47</volume>
          <issue>2</issue>
          <fpage>368</fpage>
          <lpage>368j</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29346555"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ije/dyx268</pub-id>
          <pub-id pub-id-type="medline">29346555</pub-id>
          <pub-id pub-id-type="pii">4812001</pub-id>
          <pub-id pub-id-type="pmcid">PMC5913632</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Witt</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Yawn</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Roger</surname>
              <given-names>VL</given-names>
            </name>
          </person-group>
          <article-title>A community-based study of stroke incidence after myocardial infarction</article-title>
          <source>Ann Intern Med</source>
          <year>2005</year>
          <month>12</month>
          <day>06</day>
          <volume>143</volume>
          <issue>11</issue>
          <fpage>785</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-143-11-200512060-00006</pub-id>
          <pub-id pub-id-type="medline">16330789</pub-id>
          <pub-id pub-id-type="pii">143/11/785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bielinski</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wagholikar</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Jonnalagadda</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Ravikumar</surname>
              <given-names>K E</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Stephen T</given-names>
            </name>
            <name name-style="western">
              <surname>Kullo</surname>
              <given-names>Iftikhar J</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>Christopher G</given-names>
            </name>
          </person-group>
          <article-title>An information extraction framework for cohort identification using electronic health records</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2013</year>
          <volume>2013</volume>
          <fpage>149</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24303255"/>
          </comment>
          <pub-id pub-id-type="medline">24303255</pub-id>
          <pub-id pub-id-type="pmcid">PMC3845757</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mitchell-Box</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>KL</given-names>
            </name>
          </person-group>
          <article-title>Fathers' thoughts on breastfeeding and implications for a theory-based intervention</article-title>
          <source>J Obstet Gynecol Neonatal Nurs</source>
          <year>2012</year>
          <volume>41</volume>
          <issue>6</issue>
          <fpage>E41</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1552-6909.2012.01399.x</pub-id>
          <pub-id pub-id-type="medline">22861175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harkema</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Thornblade</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
          </person-group>
          <article-title>ConText: an algorithm for determining negation, experiencer, and temporal status from clinical reports</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>10</month>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>839</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(09)00074-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2009.05.002</pub-id>
          <pub-id pub-id-type="medline">19435614</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(09)00074-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC2757457</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>editors</article-title>
          <source>ConText: An algorithm for identifying contextual features from clinical text</source>
          <year>2007</year>
          <access-date>2020-07-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/1572392.1572408">https://dl.acm.org/doi/10.5555/1572392.1572408</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>Nv</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>Kw</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>Lo</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>Wp</given-names>
            </name>
          </person-group>
          <article-title>SMOTE: Synthetic Minority Over-sampling Technique</article-title>
          <source>JAIR</source>
          <year>2002</year>
          <month>06</month>
          <day>01</day>
          <volume>16</volume>
          <issue>6</issue>
          <fpage>321</fpage>
          <lpage>357</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Liaw A</collab>
            <collab>Weiner M</collab>
          </person-group>
          <article-title>Classification and regression by randomForest</article-title>
          <source>R news</source>
          <year>2002</year>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>18</fpage>
          <lpage>22</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eibe</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Witten</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>The WEKA Workbench</article-title>
          <source>Online Appendix for Data Mining: Practical Machine Learning Tools and Techniques. Morgan Kaufmann</source>
          <year>2016</year>
          <publisher-loc>Burlington, Massachusetts</publisher-loc>
          <publisher-name>Morgan Kaufmann; 4th edition (December 1, 2016)</publisher-name>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
