<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i11e28946</article-id>
      <article-id pub-id-type="pmid">34751659</article-id>
      <article-id pub-id-type="doi">10.2196/28946</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using Artificial Intelligence With Natural Language Processing to Combine Electronic Health Record’s Structured and Free Text Data to Identify Nonvalvular Atrial Fibrillation to Decrease Strokes and Death: Evaluation and Case-Control Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Oyama</surname>
            <given-names>Hiroshi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sarajlic</surname>
            <given-names>Philip</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Bhatia</surname>
            <given-names>Sugandh</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Elkin</surname>
            <given-names>Peter L</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>University at Buffalo</institution>
            <addr-line>77 Goodell St</addr-line>
            <addr-line>Suite 5t40</addr-line>
            <addr-line>Buffalo, NY, 14203</addr-line>
            <country>United States</country>
            <phone>1 5073581341</phone>
            <email>elkinp@buffalo.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9616-6811</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Mullin</surname>
            <given-names>Sarah</given-names>
          </name>
          <degrees>PHO</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5246-2202</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Mardekian</surname>
            <given-names>Jack</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4851-5161</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Crowner</surname>
            <given-names>Christopher</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4735-321X</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Sakilay</surname>
            <given-names>Sylvester</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6684-6311</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Sinha</surname>
            <given-names>Shyamashree</given-names>
          </name>
          <degrees>MSc, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3279-6387</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Brady</surname>
            <given-names>Gary</given-names>
          </name>
          <degrees>DPH</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2833-1333</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Wright</surname>
            <given-names>Marcia</given-names>
          </name>
          <degrees>PharmD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4243-9738</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Nolen</surname>
            <given-names>Kimberly</given-names>
          </name>
          <degrees>PharmD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8821-254X</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Trainer</surname>
            <given-names>JoAnn</given-names>
          </name>
          <degrees>PharmD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4050-4249</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Koppel</surname>
            <given-names>Ross</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8235-9900</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Schlegel</surname>
            <given-names>Daniel</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3922-3231</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Kaushik</surname>
            <given-names>Sashank</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9060-4243</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Jane</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1664-2035</ext-link>
        </contrib>
        <contrib id="contrib15" contrib-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Buer</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4870-523X</ext-link>
        </contrib>
        <contrib id="contrib16" contrib-type="author">
          <name name-style="western">
            <surname>Anand</surname>
            <given-names>Edwin</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1594-8043</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>University at Buffalo</institution>
        <addr-line>Buffalo, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Bioinformatics Laboratory</institution>
        <institution>Department of Veterans Affairs</institution>
        <institution>VA Western New York Healthcare System</institution>
        <addr-line>Buffalo, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Engineering</institution>
        <institution>University of Southern Denmark</institution>
        <addr-line>Odense</addr-line>
        <country>Denmark</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Pfizer, Inc.</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Peter L Elkin <email>elkinp@buffalo.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>9</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>11</issue>
      <elocation-id>e28946</elocation-id>
      <history>
        <date date-type="received">
          <day>19</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>5</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>7</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Peter L Elkin, Sarah Mullin, Jack Mardekian, Christopher Crowner, Sylvester Sakilay, Shyamashree Sinha, Gary Brady, Marcia Wright, Kimberly Nolen, JoAnn Trainer, Ross Koppel, Daniel Schlegel, Sashank Kaushik, Jane Zhao, Buer Song, Edwin Anand. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 09.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/11/e28946" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Nonvalvular atrial fibrillation (NVAF) affects almost 6 million Americans and is a major contributor to stroke but is significantly undiagnosed and undertreated despite explicit guidelines for oral anticoagulation.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study is to investigate whether the use of semisupervised natural language processing (NLP) of electronic health record’s (EHR) free-text information combined with structured EHR data improves NVAF discovery and treatment and perhaps offers a method to prevent thousands of deaths and save billions of dollars.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We abstracted 96,681 participants from the University of Buffalo faculty practice’s EHR. NLP was used to index the notes and compare the ability to identify NVAF, congestive heart failure, hypertension, age ≥75 years, diabetes mellitus, stroke or transient ischemic attack, vascular disease, age 65 to 74 years, sex category (CHA<sub>2</sub>DS<sub>2</sub>-VASc), and Hypertension, Abnormal liver/renal function, Stroke history, Bleeding history or predisposition, Labile INR, Elderly, Drug/alcohol usage (HAS-BLED) scores using unstructured data (International Classification of Diseases codes) versus structured and unstructured data from clinical notes. In addition, we analyzed data from 63,296,120 participants in the Optum and Truven databases to determine the NVAF frequency, rates of CHA<sub>2</sub>DS<sub>2</sub>‑VASc ≥2, and no contraindications to oral anticoagulants, rates of stroke and death in the untreated population, and first year’s costs after stroke.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The structured-plus-unstructured method would have identified 3,976,056 additional true NVAF cases (<italic>P</italic>&#60;.001) and improved sensitivity for CHA<sub>2</sub>DS<sub>2</sub>-VASc and HAS-BLED scores compared with the structured data alone (<italic>P</italic>=.002 and <italic>P</italic>&#60;.001, respectively), causing a 32.1% improvement. For the United States, this method would prevent an estimated 176,537 strokes, save 10,575 lives, and save &#62;US $13.5 billion.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Artificial intelligence–informed bio-surveillance combining NLP of free-text information with structured EHR data improves data completeness, prevents thousands of strokes, and saves lives and funds. This method is applicable to many disorders with profound public health consequences.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>afib</kwd>
        <kwd>atrial fibrillation</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>NVAF</kwd>
        <kwd>natural language processing</kwd>
        <kwd>stroke risk</kwd>
        <kwd>bleed risk</kwd>
        <kwd>CHA2DS2-VASc</kwd>
        <kwd>HAS-BLED</kwd>
        <kwd>bio-surveillance</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Atrial fibrillation (AF), the most common type of arrhythmia [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>], consists of nonvalvular AF (NVAF) and valvular AF (VAF) [<xref ref-type="bibr" rid="ref1">1</xref>]. NVAF comprises approximately 70% of AF and currently affects approximately 5.8 million US patients and approximately 11 million in Europe on VAF results in a five times greater risk of stroke [<xref ref-type="bibr" rid="ref3">3</xref>] and causes approximately 15% of all strokes [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Anticoagulation treatment dramatically reduces one’s odds of a stroke to &#60;0.5% on average.</p>
        <p>The incidence of stroke with AF has prompted the development of scoring risk systems to guide anticoagulation treatment [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. In 2014, the American Heart Association, American College of Cardiology, and Heart Rhythm Society advocated for AF practice guidelines via the use of congestive heart failure, hypertension, age ≥ 75 years, diabetes mellitus, stroke or transient ischemic attack, vascular disease, age 65 to 74 years, sex category (CHA<sub>2</sub>DS<sub>2</sub>‑VASc) scores that combine the CHADS<sub>2</sub> score with additional moderate risk factors [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Individuals’ stroke risks should inform therapeutic options, which may include anticoagulants [<xref ref-type="bibr" rid="ref7">7</xref>]. The Hypertension, Abnormal liver/renal function, Stroke history, Bleeding history or predisposition, Labile INR, Elderly, Drug/alcohol usage (HAS‑BLED) score is a practical tool to assess individuals’ risk of major bleeding and to guide anticoagulant therapy [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Researchers posit that the assessment of bleeding risk factors—age, uncontrolled hypertension, ischemic heart disease, and prior ischemic stroke—may improve individualized treatment for AF.</p>
        <p>However, despite strong recommendations, oral anticoagulation (OAC) for NVAF patients remains low, with rates ranging from 39%-65% [<xref ref-type="bibr" rid="ref10">10</xref>]. Disease surveillance and clinical decision support could help detect potential candidates who could benefit from this therapy. Automatic extraction from electronic health records (EHRs) has been shown to aid health care providers by making health care information easily accessible and helping with risk calculation [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Using these tools could reduce clinicians’ computer time for data retrieval and data entry and could facilitate capturing all qualifying patients [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
      </sec>
      <sec>
        <title>The Need for Natural Language Processing</title>
        <p>Although EHRs contain an abundance of codified information, factors related to the assessment of NVAF are often poorly reflected in structured data [<xref ref-type="bibr" rid="ref11">11</xref>]. Clinical text harboring rich contextual medical information is unstructured and in free-text form. Extracting information from a clinical text remains challenging because of context-specific abbreviations, refusal to adhere to typical language conventions, and because text often includes a broad range of specific medical terms. To retrieve information from a clinical text, multiple natural language processing (NLP) approaches have been developed, including those that extract clinical entities and map them to clinical terminologies such as SNOMED CT (Systematized Nomenclature of Medicine–Clinical Terms) [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <p>To capture all potential patients with NVAF and of CHA<sub>2</sub>DS<sub>2</sub>‑VASc &#62;1 who would benefit from appropriate anticoagulation therapy, we developed a method to automate risk scoring systems using a combination of multiple EHR data sources for diagnostic information, namely the International Classification of Disease (ICD) codes and clinical notes and lists. As natural language processors are expensive to develop and require individual tuning for each task or disease area, we make use of a high definition-NLP (HD-NLP) method that uses semisupervised learning to surpass the classification performance that could be obtained either by discarding the unlabeled data and performing supervised learning or by discarding the labels and performing unsupervised learning [<xref ref-type="bibr" rid="ref15">15</xref>]. We compare the advantages of using NLP tools for NVAF phenotyping and calculate the risk scores of using structured ICD data alone.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>This study compares the effectiveness of identifying NVAF patients using three methods: (1) structured EHR data, (2) a combination of structured EHR data and NLP-analyzed existing free text (EHR notes, problem lists, and laboratories), and (3) clinicians’ assessments of NVAF patients (<italic>the gold standard</italic>). We used NLP of the EHRs’ free text to improve the identification of NVAF patients and to assess their stroke and bleeding risks more accurately. We verified the improvement in the identification of NVAF cases and in determining the CHA<sub>2</sub>DS<sub>2</sub>-VASc and HAS-BLED scores. We then examined the rates of NVAF and treatment in patients with a CHA<sub>2</sub>DS<sub>2</sub>-VASc of ≥2 and no contraindications to treatment to determine the results from our local population. Finally, we extrapolated our findings on NVAF numbers to the US population and disease costs.</p>
      <sec>
        <title>Study Populations</title>
        <p>We had two samples: a local Western New York population of 96,681 individuals and 63,296,120 participants from the Optum and Truven databases.</p>
        <sec>
          <title>Sample 1: Local</title>
          <p>To understand the effectiveness of the system in identifying NVAF patients who should be treated and are not currently on OAC therapy, we abstracted a set of 96,681 participants (aged 18-90 years) from the Allscripts outpatient electronic records at the University at Buffalo’s (UBMD) faculty practice. The research was approved by the institutional review board of the University of Buffalo.</p>
          <p>Patient data were abstracted from 2010 to September 21, 2015, before the switch to ICD-10, allowing consistent use of ICD-9 terminology and sufficient follow-up data for the study period. This yielded 212,343 patients. Of those 212,343 patients, 96,681 (45.53%) had notes and were seen for ≥1 outpatient visits (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Figure S1). Outcomes from these data included rates of AF, NVAF, and VAF diagnosis, components of the CHA<sub>2</sub>DS<sub>2</sub>-VASc and HAS-BLED scores, relevant contraindications, OAC treatment, and demographic variables. We excluded patients if they were on oral antithrombotic therapy for indications other than NVAF, had a mechanical prosthetic valve, had a hemodynamically significant mitral stenosis or significant aortic stenosis, were pregnant, had a transient AF because of reversible conditions, or had active infective endocarditis (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Figure S2). We developed the NVAF cohort using ICD-9 codes (structured data) and ICD-9 and NLP (structured-plus-unstructured) of EHR notes and patient problems. AF and atrial flutter were defined by ICD-9 codes 427.31 and 427.32 and by SNOMED CT codes 49436004 and 5370000 with all subtypes in the hierarchy.</p>
          <p>The structured data–only method used ICD 9 codes from problem lists, medications, and demographics. The structured-plus-unstructured method added clinical notes, vital signs, laboratory findings, and text from the problem list using HD-NLP for codification [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. Free text elements were coded using SNOMED CT, a general description logic–based nomenclature of clinical medicine. Specific code inclusions can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Figure S3.</p>
          <p>We then compared the accuracy of structured data alone with the structured-plus-unstructured EHR data derived using the HD-NLP system, focusing on the two models’ abilities to identify true cases of NVAF and to determine stroke and bleeding risks (CHA<sub>2</sub>DS<sub>2</sub>-VASc and HAS-BLED scores).</p>
        </sec>
        <sec>
          <title>Subsample of the Local Data</title>
          <p>For validation of the accuracy of NLP, we used a gold standard created by human review (BS, JZ, EA, and SS) from a random sample of 300 patients. To verify the NVAF identification and CHA<sub>2</sub>DS<sub>2</sub>-VASc and HAS-BLED scores, we used this 300-patient random sample from our NVAF patients, which were dual human reviewed. We also looked to determine how much better structured-plus-unstructured data were in the identification of NVAF cases and in the determination of the CHA<sub>2</sub>DS<sub>2</sub>-VASc and HAS-BLED scores.</p>
          <p>The human review data set was independently examined by 4 clinicians, each performing 150 reviews on deidentified patient encounters from the EHR. Each clinician made a judgment as to whether the patients had sustained NVAF and whether the patient had each of the components of the CHA<sub>2</sub>DS<sub>2</sub>‑VASc and HAS-BLED scores. If there were disagreements, a fifth clinician adjudicated.</p>
          <p>Calculations determined that 300 patients were needed for 90% power to predict a 5% change in accuracy given a two-sided alpha of .05, assuming a standard accuracy of 73% based on ICD-9 codes [<xref ref-type="bibr" rid="ref19">19</xref>]. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Figure S1 presents the decision tree and sample numbers, and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Figure S2 illustrates the randomization scheme.</p>
        </sec>
        <sec>
          <title>Sample 2: National—Optum and Truven Databases</title>
          <p>We analyzed the claims data from 63,296,120 participants in the Optum and Truven databases from October 2015 to September 2016 to determine the frequency of NVAF, rates of CHA<sub>2</sub>DS<sub>2</sub>‑VASc ≥2, and no contraindications to OAC, rates of stroke and death in the untreated NVAF, strokes and death in the large claims database, and the first year’s cost after stroke [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Cost differences were based on 1-year cost before and after the stroke, adjusted for inflation.</p>
          <p>We then extrapolated our findings to the US population.</p>
        </sec>
      </sec>
      <sec>
        <title>Findings for NLP</title>
        <p>We made use of an HD-NLP to rapidly assign ontological terms to the text in patient records (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Figure S5) [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. HD-NLP is a full-function NLP processing pipeline that takes sentences, parses them by their parts of speech, and builds a full semantic parse in memory; then, an ontological coder works by matching words to ontology terms, with the longest match being preferred. We used basic formal ontology as an upper-level ontology to index the data from individual trials [<xref ref-type="bibr" rid="ref18">18</xref>]. We also used the ontology of biomedical investigation and SNOMED CT as our main ontologies [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>].</p>
        <p>A level of syntactic processing was required to match text with ontological terms. The linguistic representation is specified in language models. Of primary concern here was an English language model to identify sentences, phrases, words, and parts of speech. Terms from the input ontologies were then assigned to spans of text. String matching techniques allowed for inexact matches influenced by the underlying language model. The structures of the free‑text medical records were captured and stored.</p>
        <p>To develop the NVAF model, we used a semisupervised learning algorithm training set with 36,268 patients from the Allscripts EHR UBMD faculty practice data from 2007 to 2008, with 1972 AF cases and 1795 NVAF cases to determine the best SNOMED CT codes to match the case definition. As most clinical texts are unlabeled, semisupervised learning leverages a small amount of labeled data with a large amount of unlabeled data. Researchers have shown that large amounts of unlabeled data, when used in conjunction with a limited amount of labeled data, can produce considerable improvement in learning accuracy, especially with assistance from subject matter expert’s annotation of the training set’s false positive and false negative results from each training iteration [<xref ref-type="bibr" rid="ref14">14</xref>]. All cases were coded using HD-NLP with SNOMED CT codes (the unsupervised portion of the study). Where the SNOMED CT codes and ICD-9 codes agreed that the patient had NVAF, we called that a true positive case. The same logic was used to determine true negatives. Where either coding system disagreed, our clinician (PE) reviewed the case and decided. After reviewing the false positive and false negative cases from the training data set, we used additional synonymy to the terminology and selected a more appropriate set of codes for each rule in the definition. This process was iterated on the training set until we met our accuracy goals.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>Statistical analyses were conducted using R 3.3.2. A random gold standard sample of 300 patients was taken from the sample 1 AF cohort defined by both ICD and HD-NLP. Interrater agreement was assessed using the two-way random effects model for intraclass correlation coefficient, with two-sided 10,000 samples bootstrapped 95% CI, treating the risk scores as continuous. Cohen κ with two-sided 10,000 samples bootstrapped 95% CI assessed the interrater reliability of each individual component of the scores, NVAF and AF.</p>
        <p>The accuracy of the structured data alone was compared with structured-plus-unstructured data for the outcomes of NVAF, CHA<sub>2</sub>DS<sub>2</sub>-VASc score, and HAS-BLED score in the random sample. Cohen κ with two-sided bootstrapped CIs was calculated as a measure of reliability between the gold standard and the structured and structured-plus-unstructured data. For sensitivity and specificity, a hypothesis test comparing structured with structured-plus-unstructured data was assessed using either the McNemar test for paired observations or the binomial exact test. For positive and negative predictive values, a generalized score statistic proposed by Leisenring et al [<xref ref-type="bibr" rid="ref19">19</xref>] was used for comparison.</p>
        <p>As the CHA<sub>2</sub>DS<sub>2</sub>-VASc and HAS-BLED scores are on ordinal scales from 0 to 9, we analyzed the area under the receiver operator characteristic curve (ROC) using the C-Index and Somer D, based on ordinal logistic regression, where probabilities were modelled as <italic>P(Y≥k&#124;X)</italic>, where k defines the cut-offs from 0 to 9 that the score can take. We hypothesized that the structured and NLP data were more concordant than the structured-only data compared with the gold standard between the ordinal gold standard score and the ordinal method score.</p>
        <p>We contrasted our findings with the clinical judgments from the physician review of the 300 patients, categorized as contraindicated (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Table S1) or not on OAC, would or would not benefit from OAC, and not on OAC. To determine the potential effects of adopting the NLP-enabled method with structured-plus-unstructured data, the accuracy data of the structured and NLP data method were used to extrapolate the findings for all untreated US patients in the Optum and Truven data sets with no contraindications to OACs. Then, the potential savings from reduced strokes were derived and compared with the prevailing structured-only method.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>NLP Results</title>
        <p>From the Allscripts UBMD practice EHR data, we found 2722 potential patients with NVAF using the structured and NLP method and 1849 cases using only ICD-9 codes. The use of NLP by combining structured-plus-unstructured data improved sensitivity by 32.1%, that is, 873/2722 (<italic>P</italic>&#60;.001) in determining the NVAF population. In the random sample, participants were on average 72 years old (mean 72.7, SD 13.6), 41.3% (125/300) were female, and 86.3% (259/300) were White. The true NVAF population within the random sample, as determined by clinician review, was 88% (264/300) of cases with an average age of 73 (mean 73.4, SD 13.0), of which 41.7% (110/264) were female, and 87.1% (230/264) were White. The assessment of agreement between clinicians and interrater reliability was high for the CHA<sub>2</sub>DS<sub>2</sub>-VASc score (odds ratio [OR] 0.796, 95% CI 0.725-0.853 and OR 0.878, 95% CI 0.838-0.909) and adequate for the HAS-BLED score (OR 0.609, 95% CI 0.51-0.692 and OR 0.675, 95% CI 0.544-0.77). Cohen κ, depending on whether an outcome was a rare event, ranged from –0.080 to 0.84.</p>
        <p>When we tested this in the human review of the 300 cases, we found a 46% improvement in sensitivity (<xref ref-type="table" rid="table1">Table 1</xref>), which is greater than the 32.1% improvement seen with the automated method.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Clinician review (gold standard): comparison of outcomes for structured and structured-plus-unstructured data against the gold standard for identifying a case as nonvalvular atrial fibrillation.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="380"/>
            <col width="270"/>
            <col width="270"/>
            <col width="80"/>
            <thead>
              <tr valign="bottom">
                <td>Outcome</td>
                <td>Structured surveillance</td>
                <td>Structured and NLP<sup>a</sup> surveillance</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Sensitivity, OR<sup>b</sup> (95% CI)</td>
                <td>0.54 (0.48-0.60)</td>
                <td>1 (0.979-1)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>PPV<sup>c</sup>, OR (95% CI)</td>
                <td>0.95 (0.90-0.98)</td>
                <td>0.93 (0.893-0.956)</td>
                <td>.24</td>
              </tr>
              <tr valign="top">
                <td>F<sup>d</sup> score</td>
                <td>0.686</td>
                <td>0.964</td>
                <td>N/A<sup>e</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>OR: odds ratio.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>PPV: positive predictive value.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>For case finding of nonvalvular atrial fibrillation.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Thus, the structured-plus-unstructured surveillance showed that the sensitivity for CHA<sub>2</sub>DS<sub>2</sub>-VASc ≥2 and HAS-BLED≥3 scores was significantly better than that for structured data alone (<italic>P</italic>=.002 and <italic>P</italic>&#60;.001, respectively). The specificities of the two methods were not statistically different for CHA<sub>2</sub>DS<sub>2</sub>-VASc and favored the structured method for HAS-BLED (<xref ref-type="table" rid="table2">Table 2</xref>). The positive predictive value (PPV; precision) also improved for the HAS-BLED score using the structured-plus-unstructured method (<xref ref-type="table" rid="table2">Table 2</xref>) but was not statistically different from the structured data for the CHA<sub>2</sub>DS<sub>2</sub>-VASc score. However, the negative predictive value improved for both scores using the structured-plus-unstructured method. No cases identified by the structured method were missed by the structured-plus-unstructured method.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Comparison of outcomes for structured and structured-plus-unstructured surveillance against the clinician review (gold standard) for identifying Hypertension, Abnormal liver/renal function, Stroke history, Bleeding history or predisposition, Labile INR, Elderly, Drug/alcohol usage (HAS-BLED) and congestive heart failure, hypertension, age ≥75 years, diabetes mellitus, stroke or transient ischemic attack, vascular disease, age 65 to 74 years, sex category (CHA2DS2-VASc) components.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="140"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="90"/>
            <col width="0"/>
            <col width="60"/>
            <col width="0"/>
            <col width="70"/>
            <col width="0"/>
            <col width="0"/>
            <col width="90"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="90"/>
            <col width="0"/>
            <col width="60"/>
            <col width="0"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Method</td>
                <td colspan="11">HAS-BLED</td>
                <td colspan="9">CHA<sub>2</sub>DS<sub>2</sub>-VASc</td>
              </tr>
              <tr valign="bottom">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Structured surveillance</td>
                <td colspan="2">Structured and NLP<sup>a</sup> surveillance</td>
                <td colspan="2">Difference</td>
                <td colspan="2"><italic>T</italic> test</td>
                <td colspan="2"><italic>P</italic> value</td>
                <td colspan="3">Structured surveillance</td>
                <td colspan="2">Structured and NLP surveillance</td>
                <td colspan="2">Difference</td>
                <td colspan="2">Test statistic</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="23">
                  <bold>Sensitivity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>McNemar method</td>
                <td colspan="2">0.382</td>
                <td colspan="2">0.806</td>
                <td colspan="2">0.424</td>
                <td colspan="2">72</td>
                <td colspan="2">&#60;.001</td>
                <td colspan="3">—<sup>b</sup></td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Exact binomial method</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="3">0.942</td>
                <td colspan="2">0.983</td>
                <td colspan="2">0.0413</td>
                <td colspan="2">—</td>
                <td colspan="2">.002</td>
              </tr>
              <tr valign="top">
                <td colspan="23">
                  <bold>Specificity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>McNemar method</td>
                <td colspan="2">0.947</td>
                <td colspan="2">0.777</td>
                <td colspan="2">–0.17</td>
                <td colspan="2">16</td>
                <td colspan="2">&#60;.001</td>
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Exact binomial method</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
                <td colspan="3">0.955</td>
                <td colspan="2">0.909</td>
                <td colspan="2">–0.0455</td>
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">&#62;.99<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="23">
                  <bold>PPV<sup>d</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Generalized score method</td>
                <td colspan="2">0.929</td>
                <td colspan="2">0.867</td>
                <td colspan="2">.061</td>
                <td colspan="2">4.487</td>
                <td colspan="2">.03</td>
                <td colspan="3">0.996</td>
                <td colspan="2">0.992</td>
                <td colspan="2">0.004</td>
                <td colspan="2">0.915</td>
                <td colspan="2">.34</td>
              </tr>
              <tr valign="top">
                <td colspan="23">
                  <bold>NPV<sup>e</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Generalized score method</td>
                <td colspan="2">0.459</td>
                <td colspan="2">0.689</td>
                <td colspan="2">0.23</td>
                <td colspan="2">47.757</td>
                <td colspan="2">&#60;.001</td>
                <td colspan="3">0.6</td>
                <td colspan="2">0.833</td>
                <td colspan="2">0.233</td>
                <td colspan="2">11.662</td>
                <td colspan="2">&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>There is a small number of discordant cells, such that for the gold standard’s CHA<sub>2</sub>DS<sub>2</sub>-VASc &#60;2, there is 1 case that was identified as CHA<sub>2</sub>DS<sub>2</sub>-VASc ≥2 in the structured and NLP method but not in the structured method. The exact binomial <italic>P</italic> value is calculated as <inline-graphic xlink:href="jmir_v23i11e28946_fig2.png" xlink:type="simple" mimetype="image"/></p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>There is a small number of discordant cells, such that for the gold standard’s CHA<sub>2</sub>DS<sub>2</sub>-VASc &#60;2, there is 1 case that was identified as CHA<sub>2</sub>DS<sub>2</sub>-VASc &#62;2 in the Structured and NLP method but not in the structured method. The exact binomial <italic>P</italic> value is calculated as <inline-graphic xlink:href="jmir_v23i11e28946_fig2.png" xlink:type="simple" mimetype="image"/></p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>PPV: positive predictive value.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>NPV: negative predictive value.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p><xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Figure S4 presents the conditional probability tree for the automated structured or structured-plus-NLP method, based on clinical guidelines.</p>
        <p>In <xref rid="figure1" ref-type="fig">Figure 1</xref>, the area under the ROC for the CHA<sub>2</sub>DS<sub>2</sub>-VASc scores for the structured-plus-unstructured data compared with the gold standard score was 0.914 (95% CI 0.896-0.933) with a Somer D 0.829 (SD 0.0185), and for the structured data alone compared with the gold standard score, was 0.863 (CI 0.838-0.887), with a Somer D 0.726 (SD 0.0249). For CHA<sub>2</sub>DS<sub>2</sub>-VASc scores, structured-plus-unstructured data were more concordant than structured data alone when compared with the gold standard score (Z=19.77; <italic>P</italic>&#60;.001). For the ROC curves of the HAS-BLED scores with the gold standard score as the outcome, the structured-plus-unstructured data was 0.816 (CI 0.783-0.849), with a Somer D 0.633 (SD 0.034), and the structured data alone was 0.797 (CI 0.761-0.833) with a Somer D 0.595 (SD 0.037). For HAS-BLED scores, structured-plus-unstructured data were not more concordant than structured data alone (Z=1.433; <italic>P</italic>=.149).</p>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> represents four areas under ROC curves, two for structured versus structured and NLP CHA<sub>2</sub>DS<sub>2</sub>-VASc score and two for structured versus structured and NLP HAS-BLED score. As these scores are ordinal (eg, ranging from 0-9) and not binary, as with typical ROC, we use the C-Index and Somer D based on ordinal logistic regression to model the probabilities, resulting in multiple y values for the same x.</p>
        <p>We compared the findings of the gold standard with the NLP structured-plus-unstructured data (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Table S1). Clinician reviewers found 31 untreated patients who should have been treated and 1 treated patient who, the clinicians felt, should not have been treated. This was the same total as that of the gold standard. After clinician review, there was a 32.1% improvement in PPV using the structured-plus-unstructured method when compared with the structured method alone.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Four receiver operator characteristic curves for cumulative congestive heart failure, hypertension, age ≥ 75 years, diabetes mellitus, stroke or transient ischemic attack, vascular disease, age 65 to 74 years, sex category (CHA2DS2-VASc), and Hypertension, Abnormal liver/renal function, Stroke history, Bleeding history or predisposition, Labile INR, Elderly, Drug/alcohol usage (HAS-BLED) risk scores. NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="jmir_v23i11e28946_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Extrapolating Findings to the US Population for Prevalence and Cost</title>
        <p>Extrapolation to the US population of the Truman and Optum data results can be found in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <p>To determine the national cost savings from the NLP-assisted bio-surveillance of the structured-plus-unstructured data, we used Truven data and contrasted the mean monthly costs per patient after a stroke (US $11,538) with the monthly costs before a stroke (US $2,763.33), which yielded a mean savings of US $8,776.02. This was adjusted to US $2019 as the data were from 2010 to 2015. This revealed savings of US $8,556.66 per month or yearly savings of US $102,680.</p>
        <p>The structured data method identified 1.5% (967,801/63,296,120) of the population as having NVAF. Of those cases, 84.3% (816,240/967,801) had a CHA<sub>2</sub>DS<sub>2</sub>-VASc score of ≥2. These data indicate that 60.7% (495,749/816,240) of these patients were not treated despite the current clinical guidelines. Untreated NVAF patients had a 4.4% (22,021/495,749) annual ischemic stroke risk, and the stroke patients had a 6.0% (1320/22,021) risk of death.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Optum and Truven stroke data for 1 year after atrial fibrillation (AF) diagnosis.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="450"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Population for rates</td>
                <td>Truven, n (%)</td>
                <td>Optum, n (%)</td>
                <td>Total, n (%)</td>
                <td>Event rates (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>All patients</td>
                <td>32,046,193 (50.63)</td>
                <td>31,249,927 (49.37)</td>
                <td>63,296,120 (100)</td>
                <td>—<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>Patients aged ≥18 years in 2016 with any diagnosis of AF during October 2015-September 2016</td>
                <td>422,092 (32.79)</td>
                <td>865,072 (67.21)</td>
                <td>1,287,164 (100)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Patients aged ≥18 years in 2016 with any diagnosis of AF during October 2015-September 2016 and without a VHD<sup>b</sup> diagnosis during 1-year preindex</td>
                <td>355,811 (36.76)</td>
                <td>611,990 (63.24)</td>
                <td>967,801 (100)</td>
                <td>1.5</td>
              </tr>
              <tr valign="top">
                <td>Patients aged ≥18 years in 2016 with any diagnosis of AF during October 2015-September 2016 and without VHD diagnosis during 1-year preindex and with CHA<sub>2</sub>DS<sub>2</sub>‑VASc<sup>c</sup> ≥2 and no contraindications to OAC<sup>d</sup></td>
                <td>276,465 (33.87)</td>
                <td>539,775 (66.13)</td>
                <td>816,240 (100)</td>
                <td>84.3</td>
              </tr>
              <tr valign="top">
                <td>Patients aged ≥18 years in 2016 with any diagnosis of AF during October 2015-September 2016 and without VHD diagnosis during 1-year preindex and with CHA<sub>2</sub>DS<sub>2</sub>‑VASc ≥2 and no contraindications to OAC and were untreated</td>
                <td>179,441 (36.20)</td>
                <td>316,308 (63.80)</td>
                <td>495,749 (100)</td>
                <td>60.7</td>
              </tr>
              <tr valign="top">
                <td>Stroke rate</td>
                <td>11,530 (52.36)</td>
                <td>10491 (47.64)</td>
                <td>22,021 (100)</td>
                <td>4.4</td>
              </tr>
              <tr valign="top">
                <td>Death rate</td>
                <td>727 (55.1)</td>
                <td>593 (44.9)</td>
                <td>1,320 (100)</td>
                <td>5.99</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>The values are not events.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>VHD: valvular hear disease.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>CHA<sub>2</sub>DS<sub>2</sub>‑VASc: congestive heart failure, hypertension, age ≥ 75 years, diabetes mellitus, stroke or transient ischemic attack, vascular disease, age 65 to 74 years, sex category.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>OAC: oral anticoagulation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Estimates of Morbidity, Mortality, and Cost</title>
        <p>After extrapolating our results combining the Optum and Truven data with our method of bio-surveillance, we estimated outcomes of implementing the NLP-assisted analyses of structured-plus-unstructured data nationally; that is, if implemented nationally (among a population of 316,005,000), this system could potentially prevent 176,537 strokes and 10,575 deaths in the first year of implementation, with stroke-associated savings &#62;US $18.126 billion (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Untreated strokes and their costs for first year after the event.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="460"/>
            <col width="180"/>
            <col width="180"/>
            <col width="180"/>
            <thead>
              <tr valign="bottom">
                <td>Extrapolated results</td>
                <td>Structured surveillance</td>
                <td>Structured and NLP<sup>a</sup> surveillance</td>
                <td>Difference between the two methods</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>NVAF<sup>b</sup> population</td>
                <td>4,955,284</td>
                <td>6,545,930</td>
                <td>1590,646</td>
              </tr>
              <tr valign="top">
                <td>NVAF population with no contraindications and CHA<sub>2</sub>DS<sub>2</sub>VASc<sup>c</sup> ≥2</td>
                <td>4,543,995</td>
                <td>6,002,707</td>
                <td>1,458,712</td>
              </tr>
              <tr valign="top">
                <td>NVAF population needing treatment</td>
                <td>3,009,840</td>
                <td>3,976,057</td>
                <td>966,217</td>
              </tr>
              <tr valign="top">
                <td>Strokes prevented</td>
                <td>133,637</td>
                <td>176,537</td>
                <td>42,900</td>
              </tr>
              <tr valign="top">
                <td>Deaths prevented</td>
                <td>8,005</td>
                <td>10,575</td>
                <td>2,570</td>
              </tr>
              <tr valign="top">
                <td>Cost savings<sup>d</sup> (US $)</td>
                <td>13,721,820,000</td>
                <td>18,126,800,000</td>
                <td>4,404,981,210</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>NVAF: nonvalvular atrial fibrillation.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>CHA<sub>2</sub>DS<sub>2</sub>-VASc: congestive heart failure, hypertension, age ≥ 75 years, diabetes mellitus, stroke or transient ischemic attack, vascular disease, age 65 to 74 years, sex category.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>Cost basis is US $102,680 per untreated ischemic stroke patient's excess cost for the first year after event; cost is 1.9% inflation adjusted.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Compared with structured EHR data alone, we found that NLP-assisted structured-plus-unstructured EHR data identified previously unknown and untreated patients with NVAF and their stroke and bleed risks with greater accuracy. Adding the unstructured data significantly improved the sensitivity and negative predictive value across all measures, whereas the results for NVAF specificity and PPV were strong but mixed. Future applications of this artificial intelligence (AI) bio-surveillance method may involve identifying other underdiagnosed populations.</p>
        <p>We estimated NVAF rates in large national database populations, the percentage of people who should be treated with OAC and are not currently treated, and yearly risks of stroke expressed as a percentage of these untreated patients [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. We also estimated the average incremental 1-year cost for a stroke event and identified stroke-related average death rates in the first year after event.</p>
        <p>Verhoef et al [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>] showed that bleeding rates with warfarin were, on average, 0.34% risk per year. Given additional treatment for 3,976,057 new patients, we would expect 13,824 new patient bleeds. McWilliam [<xref ref-type="bibr" rid="ref28">28</xref>] showed that the average cost of a major bleed was US $19,000 in 2008 (inflation adjusted to US $23,777.67). For the population, this equals US $328,702,452. Gilligan et al [<xref ref-type="bibr" rid="ref29">29</xref>] showed that the average total cost for warfarin therapy was US $76.19 per member per month, which translates to a total national cost of US $3,750,758,790 per year. Potential net financial treatment benefits from using the NLP-assisted structured-plus-unstructured method equates to US $14.4 billion (US $18.13 billion to US $3.75 billion).</p>
        <p>On the basis of the accuracy of the AI-derived bio-surveillance method, we show potential societal benefits of implementing this technology. Nationally, this method could identify approximately 4 million patients requiring treatment, potentially preventing &#62;176,000 strokes in the first year, and &#62;10,500 deaths, translating to national savings of &#62;US $14 billion. Including the estimated costs of excess bleeding from the treatment and from our estimate, the national implementation costs would be no greater than US $300,000,000. This type of AI-driven clinical decision support bio-surveillance has the potential to significantly improve patient care and clinicians’ treatment decisions.</p>
        <p>NVAF is but one important condition among many. Future applications of this AI bio-surveillance method may identify other underdiagnosed populations. Once deployed, the infrastructure could be used for other disorders and could be implemented at a low incremental cost.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This analysis and data extrapolation were based on previous 2014 American Heart Association, American College of Cardiology, and Heart Rhythm Society recommendations for OAC therapy in patients with NVAF and a CHA<sub>2</sub>DS<sub>2</sub>-VASc score of ≥2. The 2019 focused updates on AF now recommend that men with a CHA<sub>2</sub>DS<sub>2</sub>-VASc score of ≥2 and women with a CHA<sub>2</sub>DS<sub>2</sub>-VASc score of ≥3 should be treated with an OAC. As such, the numbers in this analysis may include women who, under the updated guidance, may not be recommended for treatment with an OAC. In addition, not all patients for whom therapy is indicated may agree to accept anticoagulation therapy.</p>
        <p>The Optum and Truven databases, although found to be effectively nonoverlapping, are, on average, considered to be for younger and healthier private payer populations; therefore, we may underestimate both protective effects and cost savings [<xref ref-type="bibr" rid="ref30">30</xref>]. If this method were extended to other diseases, models must be built and distributed uniformly across the country and perhaps internationally.</p>
        <p>The AI model processes the free text of the notes and reports, and as it can accept and process data from Cerner, Epic, and other EHRs, there should be no difference in outcome; however, this model has not been specifically tested with data from other EHRs.</p>
        <p>ICD-9 codes were used in this study because of the desire to have a consistently coded data set. ICD-10 codes were not included. Future research should investigate this method using later ICD codes.</p>
        <p>This informatics method promises many benefits. Of course, additional research is needed to determine its applicability to other diseases.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Although a common disorder (N=6 million Americans), NVAF is often underprophylaxed for thromboembolic events that may lead to strokes. Critical evidence may be found in patients’ EHRs to aid in anticoagulation decision-making. Stroke rates of untreated patients with a CHA<sub>2</sub>DS<sub>2</sub>‑VASc of ≥2 in our study were 4.44%, and of these, approximately 6% will die within 1 year. Treatment dramatically reduces one’s odds of a stroke to &#60;0.5% on average.</p>
        <p>Our structured-plus-unstructured (NLP) method identified 36.3% additional true NVAF cases (<italic>P</italic>&#60;.001) compared with the structured data alone. Extrapolating to the US population using the 63 million people in the Optum and Truven populations allowed us to predict that in just the first-year implementation of this system, it could prevent 176,537 strokes and 10,575 deaths and save the nation &#62;US $13.5 billion dollars.</p>
        <p>Moreover, this bio-surveillance method and preparedness, in general, may be useful for the discovery and treatment of many other disorders, and require further research with different diseases. Automated tools in partnership with clinicians have the potential to significantly improve adherence to established clinical guidelines and to precision medicine.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Study recruitment diagram and additional analyses.</p>
        <media xlink:href="jmir_v23i11e28946_app1.docx" xlink:title="DOCX File , 6156 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Financial and nonfinancial support.</p>
        <media xlink:href="jmir_v23i11e28946_app2.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AF</term>
          <def>
            <p>atrial fibrillation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CHA2DS2‑VASc</term>
          <def>
            <p>congestive heart failure, hypertension, age ≥ 75 years, diabetes mellitus, stroke or transient ischemic attack, vascular disease, age 65 to 74 years, sex category</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HAS-BLED</term>
          <def>
            <p>Hypertension, Abnormal liver/renal function, Stroke history, Bleeding history or predisposition, Labile INR, Elderly, Drug/alcohol usage</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">HD-NLP</term>
          <def>
            <p>high definition natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ICD</term>
          <def>
            <p>International Classification of Disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NIH</term>
          <def>
            <p>National Institutes of Health</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NVAF</term>
          <def>
            <p>nonvalvular atrial fibrillation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">OAC</term>
          <def>
            <p>oral coagulation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">OR</term>
          <def>
            <p>odds ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">ROC</term>
          <def>
            <p>receiver operator characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">SNOMED CT</term>
          <def>
            <p>Systematized Nomenclature of Medicine–Clinical Terms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">VAF</term>
          <def>
            <p>valvular atrial fibrillation</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was funded in part by Pfizer, Inc, and grants were received from the National Institutes of Health (NIH)-National Library of Medicine T15LM012495, National Center for the Advancement of Translational Sciences UL1TR001412, National Institute of Alcohol Abuse and Alcoholism R21AA026954, R33AA026954 and NIH T32GM099607. This study was funded in part by the National Cancer Institute and the Department of Veterans Affairs through the BD-STEP program. Optum’s deidentified Integrated Claims-Clinical Data set and Truven Health MarketScan Research Databases were used in this study. Partial funding from Pfizer, NIH-NLM-T15LM012495, NCATS-UL1TR001412, NIH-T32GM099607, NCI, and US-VA was received (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>GB, MW, JM, JT, and KM are employed at Pfizer.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Camm</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lip</surname>
              <given-names>GY</given-names>
            </name>
            <name name-style="western">
              <surname>De Caterina</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Savelieva</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Atar</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hohnloser</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Hindricks</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kirchhof</surname>
              <given-names>P</given-names>
            </name>
            <collab>ESC Committee for Practice Guidelines (CPG)</collab>
          </person-group>
          <article-title>2012 focused update of the esc guidelines for the management of atrial fibrillation: an update of the 2010 esc guidelines for the management of atrial fibrillation. Developed with the special contribution of the european heart rhythm association</article-title>
          <source>Eur Heart J</source>
          <year>2012</year>
          <month>11</month>
          <volume>33</volume>
          <issue>21</issue>
          <fpage>2719</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="doi">10.1093/eurheartj/ehs253</pub-id>
          <pub-id pub-id-type="medline">22922413</pub-id>
          <pub-id pub-id-type="pii">ehs253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>January</surname>
              <given-names>CT</given-names>
            </name>
            <name name-style="western">
              <surname>Wann</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Alpert</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Calkins</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cigarroa</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Cleveland</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Conti</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Ellinor</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Ezekowitz</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Field</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>KT</given-names>
            </name>
            <name name-style="western">
              <surname>Sacco</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Stevenson</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Tchou</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tracy</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Yancy</surname>
              <given-names>CW</given-names>
            </name>
            <collab>American College of Cardiology/American Heart Association Task Force on Practice Guidelines</collab>
          </person-group>
          <article-title>2014 AHA/ACC/HRS guideline for the management of patients with atrial fibrillation: a report of the american college of cardiology/american heart association task force on practice guidelines and the heart rhythm society</article-title>
          <source>J Am Coll Cardiol</source>
          <year>2014</year>
          <month>12</month>
          <day>02</day>
          <volume>64</volume>
          <issue>21</issue>
          <fpage>1</fpage>
          <lpage>76</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0735-1097(14)01740-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jacc.2014.03.022</pub-id>
          <pub-id pub-id-type="medline">24685669</pub-id>
          <pub-id pub-id-type="pii">S0735-1097(14)01740-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roger</surname>
              <given-names>VL</given-names>
            </name>
            <name name-style="western">
              <surname>Go</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Lloyd-Jones</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Benjamin</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Borden</surname>
              <given-names>WB</given-names>
            </name>
            <name name-style="western">
              <surname>Bravata</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Fullerton</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gillespie</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hailpern</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Heit</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Howard</surname>
              <given-names>VJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kissela</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Kittner</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lackland</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Lichtman</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Lisabeth</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Makuc</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Marelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Matchar</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Moy</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Mozaffarian</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mussolino</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Nichol</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Paynter</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Soliman</surname>
              <given-names>EZ</given-names>
            </name>
            <name name-style="western">
              <surname>Sorlie</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Sotoodehnia</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Turan</surname>
              <given-names>TN</given-names>
            </name>
            <name name-style="western">
              <surname>Virani</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Woo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>MB</given-names>
            </name>
            <collab>American Heart Association Statistics Committee and Stroke Statistics Subcommittee</collab>
          </person-group>
          <article-title>Heart disease and stroke statistics--2012 update: a report from the American Heart Association</article-title>
          <source>Circulation</source>
          <year>2012</year>
          <month>01</month>
          <day>3</day>
          <volume>125</volume>
          <issue>1</issue>
          <fpage>2</fpage>
          <lpage>220</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://circ.ahajournals.org/cgi/pmidlookup?view=long&#38;pmid=22179539"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/CIR.0b013e31823ac046</pub-id>
          <pub-id pub-id-type="medline">22179539</pub-id>
          <pub-id pub-id-type="pii">CIR.0b013e31823ac046</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Abbott</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Kannel</surname>
              <given-names>WB</given-names>
            </name>
          </person-group>
          <article-title>Atrial fibrillation as an independent risk factor for stroke: the framingham study</article-title>
          <source>Stroke</source>
          <year>1991</year>
          <month>08</month>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>983</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1161/01.str.22.8.983</pub-id>
          <pub-id pub-id-type="medline">1866765</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gage</surname>
              <given-names>BF</given-names>
            </name>
            <name name-style="western">
              <surname>Waterman</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Shannon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Boechler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rich</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Validation of clinical classification schemes for predicting stroke: results from the national registry of atrial fibrillation</article-title>
          <source>J Am Med Assoc</source>
          <year>2001</year>
          <month>06</month>
          <day>13</day>
          <volume>285</volume>
          <issue>22</issue>
          <fpage>2864</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.285.22.2864</pub-id>
          <pub-id pub-id-type="medline">11401607</pub-id>
          <pub-id pub-id-type="pii">joc01974</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lip</surname>
              <given-names>GY</given-names>
            </name>
            <name name-style="western">
              <surname>Nieuwlaat</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pisters</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lane</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Crijns</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>Refining clinical risk stratification for predicting stroke and thromboembolism in atrial fibrillation using a novel risk factor-based approach: The Euro Heart Survey on atrial fibrillation</article-title>
          <source>Chest</source>
          <year>2010</year>
          <month>02</month>
          <volume>137</volume>
          <issue>2</issue>
          <fpage>263</fpage>
          <lpage>72</lpage>
          <pub-id pub-id-type="doi">10.1378/chest.09-1584</pub-id>
          <pub-id pub-id-type="medline">19762550</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(10)60067-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>January</surname>
              <given-names>CT</given-names>
            </name>
            <name name-style="western">
              <surname>Wann</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Calkins</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>LY</given-names>
            </name>
            <name name-style="western">
              <surname>Cigarroa</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Cleveland</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Ellinor</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Ezekowitz</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Field</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Furie</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Heidenreich</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>KT</given-names>
            </name>
            <name name-style="western">
              <surname>Shea</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Tracy</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Yancy</surname>
              <given-names>CW</given-names>
            </name>
          </person-group>
          <article-title>2019 AHA/ACC/HRS focused update of the 2014 AHA/ACC/HRS guideline for the management of patients with atrial fibrillation: a report of the american college of cardiology/american heart association task force on clinical practice guidelines and the heart rhythm society in collaboration with the society of thoracic surgeons</article-title>
          <source>Circulation</source>
          <year>2019</year>
          <month>07</month>
          <day>09</day>
          <volume>140</volume>
          <issue>2</issue>
          <fpage>125</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.1161/CIR.0000000000000665</pub-id>
          <pub-id pub-id-type="medline">30686041</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pisters</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lane</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Nieuwlaat</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>de Vos</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Crijns</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lip</surname>
              <given-names>GY</given-names>
            </name>
          </person-group>
          <article-title>A novel user-friendly score (has-bled) to assess 1-year risk of major bleeding in patients with atrial fibrillation: the euro heart survey</article-title>
          <source>Chest</source>
          <year>2010</year>
          <month>11</month>
          <volume>138</volume>
          <issue>5</issue>
          <fpage>1093</fpage>
          <lpage>100</lpage>
          <pub-id pub-id-type="doi">10.1378/chest.10-0134</pub-id>
          <pub-id pub-id-type="medline">20299623</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(10)60585-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lip</surname>
              <given-names>GY</given-names>
            </name>
            <name name-style="western">
              <surname>Frison</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Halperin</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Lane</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Comparative validation of a novel risk score for predicting bleeding risk in anticoagulated patients with atrial fibrillation: the has-bled (hypertension, abnormal renal/liver function, stroke, bleeding history or predisposition, labile inr, elderly, drugs/alcohol concomitantly) score</article-title>
          <source>J Am Coll Cardiol</source>
          <year>2011</year>
          <month>01</month>
          <day>11</day>
          <volume>57</volume>
          <issue>2</issue>
          <fpage>173</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0735-1097(10)04337-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jacc.2010.09.024</pub-id>
          <pub-id pub-id-type="medline">21111555</pub-id>
          <pub-id pub-id-type="pii">S0735-1097(10)04337-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tiryaki</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Nutescu</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Hennenfent</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Karageanes</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Koesterer</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lambert</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Schumock</surname>
              <given-names>GT</given-names>
            </name>
          </person-group>
          <article-title>Anticoagulation therapy for hospitalized patients: patterns of use, compliance with national guidelines, and performance on quality measures</article-title>
          <source>Am J Health Syst Pharm</source>
          <year>2011</year>
          <month>07</month>
          <day>01</day>
          <volume>68</volume>
          <issue>13</issue>
          <fpage>1239</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.2146/ajhp100543</pub-id>
          <pub-id pub-id-type="medline">21690430</pub-id>
          <pub-id pub-id-type="pii">68/13/1239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Navar-Boggan</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Rymer</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Piccini</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Shatila</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ring</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stafford</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Khatib</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>ED</given-names>
            </name>
          </person-group>
          <article-title>Accuracy and validation of an automated electronic algorithm to identify patients with atrial fibrillation at risk for stroke</article-title>
          <source>Am Heart J</source>
          <year>2015</year>
          <month>01</month>
          <volume>169</volume>
          <issue>1</issue>
          <fpage>39</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ahj.2014.09.014</pub-id>
          <pub-id pub-id-type="medline">25497246</pub-id>
          <pub-id pub-id-type="pii">S0002-8703(14)00615-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>What can natural language processing do for clinical decision support?</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>10</month>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>760</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(09)00108-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2009.08.007</pub-id>
          <pub-id pub-id-type="medline">19683066</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(09)00108-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC2757540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aakre</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dziadzko</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Keegan</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Herasevich</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Automating clinical score calculation within the electronic health record. A feasibility assessment</article-title>
          <source>Appl Clin Inform</source>
          <year>2017</year>
          <month>04</month>
          <day>12</day>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>369</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28401245"/>
          </comment>
          <pub-id pub-id-type="doi">10.4338/ACI-2016-09-RA-0149</pub-id>
          <pub-id pub-id-type="medline">28401245</pub-id>
          <pub-id pub-id-type="pii">2016-09-RA-0149</pub-id>
          <pub-id pub-id-type="pmcid">PMC6241755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elkin</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Froehling</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Wahner-Roedler</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>KR</given-names>
            </name>
          </person-group>
          <article-title>Comparison of natural language processing biosurveillance methods for identifying influenza from encounter notes</article-title>
          <source>Ann Intern Med</source>
          <year>2012</year>
          <month>01</month>
          <day>03</day>
          <volume>156</volume>
          <issue>1 Pt 1</issue>
          <fpage>11</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-156-1-201201030-00003</pub-id>
          <pub-id pub-id-type="medline">22213490</pub-id>
          <pub-id pub-id-type="pii">156/1_Part_1/11</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeung</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Downing</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Fei-Fei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Milstein</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Bedside computer vision - moving artificial intelligence from driver assistance to patient safety</article-title>
          <source>N Engl J Med</source>
          <year>2018</year>
          <month>04</month>
          <day>05</day>
          <volume>378</volume>
          <issue>14</issue>
          <fpage>1271</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp1716891</pub-id>
          <pub-id pub-id-type="medline">29617592</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murff</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>FitzHenry</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Matheny</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Gentry</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kotter</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Crimin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dittus</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Rosen</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Elkin</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Speroff</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Automated identification of postoperative complications within an electronic medical record using natural language processing</article-title>
          <source>J Am Med Assoc</source>
          <year>2011</year>
          <month>08</month>
          <day>24</day>
          <volume>306</volume>
          <issue>8</issue>
          <fpage>848</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2011.1204</pub-id>
          <pub-id pub-id-type="medline">21862746</pub-id>
          <pub-id pub-id-type="pii">306/8/848</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schlegel</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Crowner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lehoullier</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Elkin</surname>
              <given-names>PL</given-names>
            </name>
          </person-group>
          <article-title>HTP-NLP: A new NLP system for high throughput phenotyping</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2017</year>
          <volume>235</volume>
          <fpage>276</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28423797"/>
          </comment>
          <pub-id pub-id-type="medline">28423797</pub-id>
          <pub-id pub-id-type="pmcid">PMC7767581</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birman-Deych</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Waterman</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nilasena</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gage</surname>
              <given-names>BF</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of ICD-9-CM codes for identifying cardiovascular and stroke risk factors</article-title>
          <source>Med Care</source>
          <year>2005</year>
          <month>05</month>
          <volume>43</volume>
          <issue>5</issue>
          <fpage>480</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1097/01.mlr.0000160417.39497.a9</pub-id>
          <pub-id pub-id-type="medline">15838413</pub-id>
          <pub-id pub-id-type="pii">00005650-200505000-00009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leisenring</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Alonzo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Pepe</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Comparisons of predictive values of binary medical diagnostic tests for paired designs</article-title>
          <source>Biometrics</source>
          <year>2000</year>
          <month>06</month>
          <volume>56</volume>
          <issue>2</issue>
          <fpage>345</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.1111/j.0006-341x.2000.00345.x</pub-id>
          <pub-id pub-id-type="medline">10877288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <source>Optum</source>
          <year>2019</year>
          <access-date>2019-02-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.optum.com/solutions/data-analytics/data.html">https://www.optum.com/solutions/data-analytics/data.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <article-title>Truven Health Analytics</article-title>
          <source>IBM</source>
          <year>2019</year>
          <access-date>2919-02-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://truvenhealth.com/Portals/0/assets/ACRS_11223_0912_MarketScanResearch_SS_Web.pdf">https://truvenhealth.com/Portals/0/assets/ACRS_11223_0912_MarketScanResearch_SS_Web.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wade</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Implementing snomed ct for quality reporting: avoiding pitfalls</article-title>
          <source>Appl Clin Inform</source>
          <year>2011</year>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>534</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23616894"/>
          </comment>
          <pub-id pub-id-type="doi">10.4338/ACI-2011-10-RA-0056</pub-id>
          <pub-id pub-id-type="medline">23616894</pub-id>
          <pub-id pub-id-type="pmcid">PMC3613001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bandrowski</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brinkman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Brochhausen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brush</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Bug</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chibucos</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Clancy</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Courtot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Derom</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dumontier</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fostel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fragoso</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Beltran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Haendel</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Heiskanen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lister</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Lord</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Malone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Manduchi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McGee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Morrison</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Overton</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Parkinson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rocca-Serra</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ruttenberg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sansone</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scheuermann</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Schober</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Soldatova</surname>
              <given-names>LN</given-names>
            </name>
            <name name-style="western">
              <surname>Stoeckert</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>CF</given-names>
            </name>
            <name name-style="western">
              <surname>Torniai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Vita</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Whetzel</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The ontology for biomedical investigations</article-title>
          <source>PLoS One</source>
          <year>2016</year>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>e0154556</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0154556"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0154556</pub-id>
          <pub-id pub-id-type="medline">27128319</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-55757</pub-id>
          <pub-id pub-id-type="pmcid">PMC4851331</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Paley</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>James</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gompertz</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wolfe</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Hemingway</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rudd</surname>
              <given-names>AG</given-names>
            </name>
          </person-group>
          <article-title>Socioeconomic disparities in first stroke incidence, quality of care, and survival: a nationwide registry-based cohort study of 44 million adults in england</article-title>
          <source>Lancet Public Health</source>
          <year>2018</year>
          <month>04</month>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>185</fpage>
          <lpage>93</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2468-2667(18)30030-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2468-2667(18)30030-6</pub-id>
          <pub-id pub-id-type="medline">29550372</pub-id>
          <pub-id pub-id-type="pii">S2468-2667(18)30030-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC5887080</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Freedman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Potpara</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Lip</surname>
              <given-names>GY</given-names>
            </name>
          </person-group>
          <article-title>Stroke prevention in atrial fibrillation</article-title>
          <source>Lancet</source>
          <year>2016</year>
          <month>08</month>
          <day>20</day>
          <volume>388</volume>
          <issue>10046</issue>
          <fpage>806</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(16)31257-0</pub-id>
          <pub-id pub-id-type="medline">27560276</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(16)31257-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verhoef</surname>
              <given-names>TI</given-names>
            </name>
            <name name-style="western">
              <surname>Redekop</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>Darba</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Geitona</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Siebert</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>de Boer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Maitland-van der Zee</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barallon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Briz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Daly</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Haschke-Becher</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kamali</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kirchheiner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Manolopoulos</surname>
              <given-names>VG</given-names>
            </name>
            <name name-style="western">
              <surname>Pirmohamed</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rosendaal</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>van Schie</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Wadelius</surname>
              <given-names>M</given-names>
            </name>
            <collab>EU-PACT Group</collab>
          </person-group>
          <article-title>A systematic review of cost-effectiveness analyses of pharmacogenetic-guided dosing in treatment with coumarin derivatives</article-title>
          <source>Pharmacogenomics</source>
          <year>2010</year>
          <month>07</month>
          <volume>11</volume>
          <issue>7</issue>
          <fpage>989</fpage>
          <lpage>1002</lpage>
          <pub-id pub-id-type="doi">10.2217/pgs.10.74</pub-id>
          <pub-id pub-id-type="medline">20602617</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Sangaralingham</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Bellolio</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>McBane</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Noseworthy</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Effectiveness and safety of dabigatran, rivaroxaban, and apixaban versus warfarin in nonvalvular atrial fibrillation</article-title>
          <source>J Am Heart Assoc</source>
          <year>2016</year>
          <month>06</month>
          <day>13</day>
          <volume>5</volume>
          <issue>6</issue>
          <fpage>e003725</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ahajournals.org/doi/10.1161/JAHA.116.003725?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/JAHA.116.003725</pub-id>
          <pub-id pub-id-type="medline">27412905</pub-id>
          <pub-id pub-id-type="pii">JAHA.116.003725</pub-id>
          <pub-id pub-id-type="pmcid">PMC4937291</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Desai</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Hyde</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Kabadi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>St Louis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bonato</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Loomis</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Galaznik</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Utilization of positive and negative controls to examine comorbid associations in observational database studies</article-title>
          <source>Med Care</source>
          <year>2017</year>
          <month>03</month>
          <volume>55</volume>
          <issue>3</issue>
          <fpage>244</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27787351"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MLR.0000000000000640</pub-id>
          <pub-id pub-id-type="medline">27787351</pub-id>
          <pub-id pub-id-type="pmcid">PMC5318155</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McWilliam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lutter</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nardinelli</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Healthcare impact of personalized medicine using genetic testing: an exploratory analysis for warfarin</article-title>
          <source>Per Med</source>
          <year>2008</year>
          <month>05</month>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>279</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.2217/17410541.5.3.279</pub-id>
          <pub-id pub-id-type="medline">29783488</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gilligan</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Gandhi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Henriques</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sander</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>All-cause, stroke-, and bleed-specific healthcare costs: comparison among patients with Non-Valvular Atrial Fibrillation (NVAF)newly treated with dabigatran or warfarin</article-title>
          <source>Am J Cardiovasc Drugs</source>
          <year>2017</year>
          <month>12</month>
          <volume>17</volume>
          <issue>6</issue>
          <fpage>481</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28795348"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40256-017-0244-1</pub-id>
          <pub-id pub-id-type="medline">28795348</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40256-017-0244-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5701952</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
