<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v24i8e40384</article-id>
      <article-id pub-id-type="pmid">36040790</article-id>
      <article-id pub-id-type="doi">10.2196/40384</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Development and Evaluation of a Natural Language Processing Annotation Tool to Facilitate Phenotyping of Cognitive Status in Electronic Health Records: Diagnostic Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Basch</surname>
            <given-names>Corey</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Walsh</surname>
            <given-names>Joseph</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hopper</surname>
            <given-names>Louise</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Noori</surname>
            <given-names>Ayush</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1420-1236</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Magdamo</surname>
            <given-names>Colin</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8965-4630</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Xiao</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5885-3466</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Tyagi</surname>
            <given-names>Tanish</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9792-3025</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Zhaozhi</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5957-4509</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Kondepudi</surname>
            <given-names>Akhil</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2643-7580</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Alabsi</surname>
            <given-names>Haitham</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6354-4679</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Rudmann</surname>
            <given-names>Emily</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6443-990X</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Wilcox</surname>
            <given-names>Douglas</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6840-7948</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Brenner</surname>
            <given-names>Laura</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6752-0471</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Robbins</surname>
            <given-names>Gregory K</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7545-5817</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Moura</surname>
            <given-names>Lidia</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1191-1315</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Zafar</surname>
            <given-names>Sahar</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5252-5376</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author">
          <name name-style="western">
            <surname>Benson</surname>
            <given-names>Nicole M</given-names>
          </name>
          <degrees>MD, MBI</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5361-4311</ext-link>
        </contrib>
        <contrib id="contrib15" contrib-type="author">
          <name name-style="western">
            <surname>Hsu</surname>
            <given-names>John</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8244-231X</ext-link>
        </contrib>
        <contrib id="contrib16" contrib-type="author">
          <name name-style="western">
            <surname>R Dickson</surname>
            <given-names>John</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0135-7928</ext-link>
        </contrib>
        <contrib id="contrib17" contrib-type="author">
          <name name-style="western">
            <surname>Serrano-Pozo</surname>
            <given-names>Alberto</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0899-7530</ext-link>
        </contrib>
        <contrib id="contrib18" contrib-type="author">
          <name name-style="western">
            <surname>Hyman</surname>
            <given-names>Bradley T</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7959-9401</ext-link>
        </contrib>
        <contrib id="contrib19" contrib-type="author">
          <name name-style="western">
            <surname>Blacker</surname>
            <given-names>Deborah</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff8" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6107-7376</ext-link>
        </contrib>
        <contrib id="contrib20" contrib-type="author">
          <name name-style="western">
            <surname>Westover</surname>
            <given-names>M Brandon</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4803-312X</ext-link>
        </contrib>
        <contrib id="contrib21" contrib-type="author">
          <name name-style="western">
            <surname>Mukerji</surname>
            <given-names>Shibani S</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5677-6954</ext-link>
        </contrib>
        <contrib id="contrib22" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Das</surname>
            <given-names>Sudeshna</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Neurology</institution>
            <institution>Massachusetts General Hospital</institution>
            <addr-line>55 Fruit Street</addr-line>
            <addr-line>Boston, MA, 02114</addr-line>
            <country>United States</country>
            <phone>1 617 726 2000</phone>
            <email>SDAS5@mgh.harvard.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9486-6811</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Neurology</institution>
        <institution>Massachusetts General Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Harvard Medical School</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Vaccine and Immunotherapy Center</institution>
        <institution>Division of Infectious Disease</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Division of Pulmonary and Critical Care Medicine</institution>
        <institution>Massachusetts General Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Division of Infectious Diseases</institution>
        <institution>Massachusetts General Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Mongan Institute</institution>
        <institution>Massachusetts General Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>McLean Hospital</institution>
        <addr-line>Belmont, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Department of Psychiatry</institution>
        <institution>Massachusetts General Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Sudeshna Das <email>SDAS5@mgh.harvard.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>30</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>24</volume>
      <issue>8</issue>
      <elocation-id>e40384</elocation-id>
      <history>
        <date date-type="received">
          <day>17</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>31</day>
          <month>7</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Ayush Noori, Colin Magdamo, Xiao Liu, Tanish Tyagi, Zhaozhi Li, Akhil Kondepudi, Haitham Alabsi, Emily Rudmann, Douglas Wilcox, Laura Brenner, Gregory K Robbins, Lidia Moura, Sahar Zafar, Nicole M Benson, John Hsu, John R Dickson, Alberto Serrano-Pozo, Bradley T Hyman, Deborah Blacker, M Brandon Westover, Shibani S Mukerji, Sudeshna Das. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 30.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2022/8/e40384" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Electronic health records (EHRs) with large sample sizes and rich information offer great potential for dementia research, but current methods of phenotyping cognitive status are not scalable.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to evaluate whether natural language processing (NLP)–powered semiautomated annotation can improve the speed and interrater reliability of chart reviews for phenotyping cognitive status.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>In this diagnostic study, we developed and evaluated a semiautomated NLP-powered annotation tool (NAT) to facilitate phenotyping of cognitive status. Clinical experts adjudicated the cognitive status of 627 patients at Mass General Brigham (MGB) health care, using NAT or traditional chart reviews. Patient charts contained EHR data from two data sets: (1) records from January 1, 2017, to December 31, 2018, for 100 Medicare beneficiaries from the MGB Accountable Care Organization and (2) records from 2 years prior to COVID-19 diagnosis to the date of COVID-19 diagnosis for 527 MGB patients. All EHR data from the relevant period were extracted; diagnosis codes, medications, and laboratory test values were processed and summarized; clinical notes were processed through an NLP pipeline; and a web tool was developed to present an integrated view of all data. Cognitive status was rated as cognitively normal, cognitively impaired, or undetermined. Assessment time and interrater agreement of NAT compared to manual chart reviews for cognitive status phenotyping was evaluated.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>NAT adjudication provided higher interrater agreement (Cohen κ=0.89 vs κ=0.80) and significant speed up (time difference mean 1.4, SD 1.3 minutes; <italic>P</italic>&#60;.001; ratio median 2.2, min-max 0.4-20) over manual chart reviews. There was moderate agreement with manual chart reviews (Cohen κ=0.67). In the cases that exhibited disagreement with manual chart reviews, NAT adjudication was able to produce assessments that had broader clinical consensus due to its integrated view of highlighted relevant information and semiautomated NLP features.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>NAT adjudication improves the speed and interrater reliability for phenotyping cognitive status compared to manual chart reviews. This study underscores the potential of an NLP-based clinically adjudicated method to build large-scale dementia research cohorts from EHRs.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>chart review</kwd>
        <kwd>cognition</kwd>
        <kwd>cognitive status</kwd>
        <kwd>dementia</kwd>
        <kwd>diagnostic</kwd>
        <kwd>electronic health record</kwd>
        <kwd>health care</kwd>
        <kwd>natural language processing</kwd>
        <kwd>research cohort</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>In recent years, electronic health records (EHRs) have become increasingly common in US health care facilities; they provide a wealth of information on patient demographics, medical history, clinical data, and health system interactions. EHRs offer an unprecedented opportunity to improve clinical care and examine a broad variety of scientific, health care utilization, and heath policy questions [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. An important first step in conducting EHR research is accurately identifying patients with a certain health condition, event, or disease, which is known as phenotyping [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. The identified patient sample is subsequently leveraged for a wide range of purposes, such as providing clinical decision support for health care delivery [<xref ref-type="bibr" rid="ref5">5</xref>], conducting epidemiological research [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], and for the practice of precision medicine [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>Phenotyping cognitive status (ie, distinguishing between normal cognition and any stage of cognitive impairment) in EHR is a major challenge since dementia is underrecognized, underdiagnosed, and underreported in claims data [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref12">12</xref>], leading to inaccurate identification of dementia cases in many studies based on claims or EHR data [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. Informative missingness, errors, and biases in EHR may further exacerbate the challenges of defining dementia outcomes [<xref ref-type="bibr" rid="ref16">16</xref>]. Yet another challenge of phenotyping arises from complex, subjective, loosely-defined diagnostic criteria as well as the format—that is, structured (eg, diagnosis codes and medications) versus unstructured (eg, clinical notes and images)—in which the information is stored [<xref ref-type="bibr" rid="ref4">4</xref>]. Previous studies have demonstrated that information on cognitive status is often found only in free text [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. Clinicians may chart symptoms of cognitive problems in clinical notes but may not make a formal diagnosis, refer to a specialist, or prescribe medication for multiple reasons including clinical role, lack of time or expertise, patient resistance, or limited treatment options [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. Thus, accurately phenotyping cognitive status requires the combined use of both structured data, such as diagnosis codes, medications, and laboratory test results, as well as unstructured clinical notes.</p>
      <p>Several algorithms have been developed for phenotyping cognitive status; some studies used structured data, such as diagnosis codes, missed appointments, or health care utilization patterns [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref23">23</xref>], whereas others have applied natural language processing (NLP) to unstructured notes [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. None of these prior efforts combined both structured and unstructured input modalities, and manual annotation by clinical experts is limited by the lack of available tools to facilitate efficient chart review [<xref ref-type="bibr" rid="ref25">25</xref>]. Thus, we hypothesized that the best approach for phenotyping cognitive status is a semiautomated one in which automated NLP is applied to clinical notes and presented in an integrated view to the clinical expert for final manual adjudication of cognitive status.</p>
      <p>We developed NAT, a semiautomated NLP-powered annotation tool, to facilitate adjudication of cognitive status. The tool extracts and processes data from EHRs and then ranks clinical notes based on a deep learning NLP algorithm (Macro <italic>F</italic><sub>1</sub>=0.92) that classifies whether a note indicates normal cognition, cognitive impairment, or has no pertinent information [<xref ref-type="bibr" rid="ref26">26</xref>]. It highlights key information and presents a summarized view to the annotator. We evaluated NAT in two EHR data sets: (1) Medicare beneficiaries from the Mass General Brigham (MGB) Accountable Care Organization (ACO) who were labeled in another study using manual chart reviews [<xref ref-type="bibr" rid="ref15">15</xref>] and (2) MGB patients with laboratory confirmed SARS-CoV-2 (a case-control study to investigate the effects of COVID-19 on people with and without HIV was used as an exemplar of a research cohort that requires labeling of cognitive status). We evaluated interrater agreement in the first data set and compared it to interrater agreement in Epic—the EHR system used at MGB since 2015. The second data set was used to compare timings of manual to NAT adjudication, as the timing of manual adjudication was not available in the first data set.</p>
      <p>By addressing the gaps in current chart review methods and leveraging existing NLP methods, we demonstrate that NAT increases both the efficiency and the interrater reliability of phenotyping cognitive status in EHR (relative to manual chart reviews) to build future research cohorts.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Clinical Settings and Data Sources</title>
        <p>This diagnostic study was conducted at MGB—formerly Partners Healthcare—a private nonprofit organization comprising two major academic hospitals, community hospitals, and community health centers in the Boston metropolitan area. Data were sourced from the MGB Enterprise Data Warehouse that stores data from Epic. We evaluated NAT adjudication for phenotyping cognitive status on two distinct data sets. The first one included EHR data from January 1, 2017, to December 31, 2018, of 100 patients randomly selected from a larger data set that was expert-annotated via manual Epic chart reviews and reported elsewhere [<xref ref-type="bibr" rid="ref15">15</xref>]. Specifically, this manually expert-annotated data set contained 1002 Medicare beneficiaries from the MGB ACO who were classified into (1) normal cognition, (2) borderline of normal cognition and mild cognitive impairment (MCI), (3) MCI, (4) borderline of MCI and dementia, or (5) dementia [<xref ref-type="bibr" rid="ref15">15</xref>]. The experts graded their confidence in the adjudication as low, medium, moderate, or high. The 100 patients were randomly sampled from these 5 classes with 20 from each class, ensuring that each class had a similar distribution of confidence scores. The second data set included 527 MGB patients with a laboratory confirmed SARS-CoV-2 infection based on polymerase chain reaction testing between March 1 and December 31, 2020. The data set was created for a case-control study to investigate the effects of COVID-19 on people with and without HIV; EHR data up to 2 years prior to and any time after the index positive polymerase chain reaction test were used to investigate the performance of NAT adjudication.</p>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>This study was approved by the MGB Institutional Review Board (2015P001915).</p>
      </sec>
      <sec>
        <title>Definition of Cognitive Impairment</title>
        <p>In this study, to phenotype cognitive status, patients were annotated with three labels: (1) cognitively normal (CN), (2) cognitively impaired (CI), and (3) undetermined. Patients were labeled as CI if there was any documented suspicion or concern of memory or cognitive decline, whether based on symptoms, observations, or objective testing. This ranged from any dementia-related International Classification of Diseases (ICD) codes or medicines in the patients’ charts to cognitive concerns—relayed by patients, family members or friends, or providers in the notes and phone logs—as these concerns often reflect an underlying change in cognition even if a cognitive evaluation is normal (in which case they prompt a diagnosis of subjective cognitive decline [<xref ref-type="bibr" rid="ref27">27</xref>]). Conversely, to be annotated as CN, at least implicit evidence of no cognitive concerns was required (eg, the patient continued to work, clearly managed their own care or hobbies, and followed complicated instructions, or they had annual wellness or specialist notes with multisystem assessment and no mention of a cognitive concern). The strongest evidence for a CN annotation was a cognition test performed with an explicit note of intact cognition. If there was conflicting evidence of both cognitive impairment and evidence of no cognitive impairment in a patient’s chart, the latest evidence or specialist notes (if any were available) informed the adjudication. Finally, patients were marked as “undetermined” if the EHR did not have sufficient information.</p>
      </sec>
      <sec>
        <title>Data Preparation</title>
        <p>Data query, preparation, and preprocessing steps are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. For each patient, the following EHR data from the relevant time period were extracted from the Enterprise Data Warehouse: (1) patient demographic information, including name, medical record number, birth date, sex, ethnic group, marital status, and educational level; (2) all clinical notes, including reason for visit, history, note text, encounter type, and MGB provider (including provider department, specialty, and qualifications); (3) current primary care provider; (4) patient care coordination note; (5) medication history and current medications; (6) magnetic resonance imaging and computerized tomography orders; (7) laboratory orders and results; (8) problem list, including ICD diagnoses and diagnosis codes; and (9) visit cancellations.</p>
        <p>Several features were engineered from the EHR to facilitate assessment of cognitive status. Dementia-related medications and ICD codes (medications: galantamine, donepezil, rivastigmine, and memantine; ICD-9 codes: 290.X, 294.X, 331.X, and 780.93; ICD-10 codes: G30.X and G31.X) and laboratory tests (eg, vitamin B12, folate, and thyroid-stimulating hormone) related to assessment of cognitive status were identified and highlighted. The numbers of cancellations, no-shows, and refill requests, relative to the total number of encounters, were computed.</p>
        <p>Finally, NLP was applied to the clinical notes. We curated two lists of regular expressions or keywords related to the presence or absence of both (1) cognitive impairment and (2) the functional impairment of activities of daily living (ADLs) or independent ADLs, respectively (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendices 2</xref> and <xref ref-type="supplementary-material" rid="app3">3</xref>). We identified regular expression matches and highlighted these within the text of the notes with different colors for each category (eg, cognition vs ADLs) to facilitate their identification by the clinician. We applied a previously developed NLP model [<xref ref-type="bibr" rid="ref26">26</xref>] to generate classification probabilities of the following classes for each note: CI, no CI, or neither. The notes were ranked based on these classification probabilities, and notes that the model predicted as indicative of CI were displayed at the top.</p>
      </sec>
      <sec>
        <title>Development of an Annotation Tool</title>
        <p>We designed and developed a web-based chart review and annotation tool, using the Python-based open-source Django web development framework with a SQLite database. We established data models for patient-level demographic and clinical data, encounter-level clinician notes, user account creation and authentication, and patient assignment to individual or multiple annotators (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). We created several user interfaces (ie, pages) to present the various data modalities in an integrated fashion for annotation.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>We evaluated NAT adjudication using three metrics: agreement with manual Epic chart reviews, assessment time, and interrater agreement. We evaluated agreement between manual Epic chart reviews and NAT adjudication as well as interrater agreement for NAT adjudication using Cohen κ, whereas assessment time in minutes was compared using a paired samples Wilcoxon test (also known as the Wilcoxon signed-rank test). There were no missing data for these variables. All analyses were conducted using the R statistical software (version 4.1.2; R Core Team).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Patient Characteristics</title>
        <p>The patient characteristics of the two data sets are shown in <xref ref-type="table" rid="table1">Table 1</xref>. The ACO data set comprised 100 patients (63/100, 63.0% were women; mean age 78.8, SD 7.4 years; 7/100, 7% racial or ethnic minorities, 1 missing; 51/100, 51.0% with a college degree or more, 3 missing; and 50/100, 50.0% were married). The COVID-19 data set comprised 527 patients (226/527, 42.9% women; mean age 52.6, SD 15.0 years; 318/527, 60.35% racial or ethnic minorities, 21 missing; 160/527, 30.4% college education or more, 62 missing; and 195/527, 37.0% married, 16 missing).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Characteristics of Accountable Care Organization (ACO) and COVID-19 data sets used for NLP<sup>a</sup> annotation tool (NAT) evaluation.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="330"/>
            <col width="0"/>
            <col width="320"/>
            <col width="0"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristics</td>
                <td colspan="3">Patients (N=627)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">ACO data set (n=100)</td>
                <td>COVID-19 data set (n=527)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Sex, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td colspan="2">37 (37)</td>
                <td colspan="2">301 (57.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td colspan="2">63 (63)</td>
                <td colspan="2">226 (42.9)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Age (years), mean (SD)</td>
                <td colspan="2">78.8 (7.4)</td>
                <td>52.6 (15)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Minorities, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black</td>
                <td colspan="2">4 (4)</td>
                <td colspan="2">163 (30.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic</td>
                <td colspan="2">2 (2)</td>
                <td colspan="2">138 (26.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian</td>
                <td colspan="2">1 (1)</td>
                <td colspan="2">16 (3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Indigenous</td>
                <td colspan="2">0 (0)</td>
                <td colspan="2">1 (0.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">College education, n (%)</td>
                <td colspan="2">51 (51)</td>
                <td>160 (30.4)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Married, n (%)</td>
                <td colspan="2">50 (50.0)</td>
                <td>195 (37)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Clinical characteristics</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Number of encounters, median (min-max)</td>
                <td colspan="2">164 (8-858)</td>
                <td colspan="2">106 (1-2474)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>PCP<sup>b</sup> visit, n (%)</td>
                <td colspan="2">71 (71)</td>
                <td colspan="2">423 (80.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Dementia ICD<sup>c</sup> code and medication, n (%)</td>
                <td colspan="2">51 (51)</td>
                <td colspan="2">166 (5.3)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>PCP: primary care provider.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>ICD: International Classification of Diseases.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Features of NAT</title>
        <p>Upon logging in to our annotation tool, an authenticated user is presented with a dashboard listing the patient IDs, ages, and sexes of their assigned patients (<xref rid="figure1" ref-type="fig">Figure 1</xref>). In addition, the total number of notes, the sequences within the notes that match a cognition or ADL keyword (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>), and the number of notes for each predicted class (ie, cognition and ADL) are also presented. After annotation, the patient’s label (CN, CI, or undetermined) is displayed with background colors reflecting the patient’s annotated cognitive status.</p>
        <p>Selecting a patient navigates the user to an annotation view summarizing the patient’s demographic and clinical information (<xref rid="figure2" ref-type="fig">Figure 2</xref>A). Engineered features, including the total number of notes, encounters, no shows, cancellations, and refill requests, along with the patient care coordination note (if any), diagnosis ICD codes, and medications, are displayed (<xref rid="figure2" ref-type="fig">Figure 2</xref>B). Brain imaging and relevant laboratory tests, such as thyroid-stimulating hormone or vitamin B12, allow annotators to consider systemic causes of cognitive changes (<xref rid="figure2" ref-type="fig">Figure 2</xref>C). Finally, notes sorted by the predicted probability and with highlighted keywords are presented to expedite the review of the entire chart history during the relevant period for the clinical adjudication of cognitive status. Examples of the three predicted classes of notes (CN, CI, and undetermined) are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>D.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>NAT dashboard: screenshot of the NAT dashboard displaying the current workload and assigned patients. A summary of patient information is displayed in each row, and the background reflects the cognitive status assigned to the patient. NAT: NLP annotation tool; NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e40384_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Annotation view: (A) patient view displaying summary information at the top and sequences from clinical notes at the bottom; (B) the Patient Information box summarizes health care interaction, patient care coordination notes, current medications, and diagnosis codes; (C) laboratory tests and imaging conducted on the patient; (D) sample sequences from notes with dementia and activities of daily living (ADLs) keywords highlighted. Each sequence is classified as cognitive impairment (CI), no CI, or neither, with a probability, and allows annotators to flag incorrect classifications.</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e40384_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Evaluation of NAT</title>
        <p>Two teams of expert clinicians were randomly assigned patients and adjudicated the ACO data set, using NAT (team 1: LB, GKR, SSM; team 2: MBW and HA). We compared the phenotyping of cognitive status using NAT to manual chart reviews using Epic (labels were obtained from Moura et al [<xref ref-type="bibr" rid="ref15">15</xref>]; patients who were not CN were grouped into the CI class). We removed patients annotated as “undetermined” in the set adjudicated using NAT, as they had little information in EHR to assess cognitive status and could not be directly compared to the labels obtained from Moura et al [<xref ref-type="bibr" rid="ref15">15</xref>]. The agreement between NAT and manual Epic chart reviews was moderate for both team 1 (Cohen κ=0.68) and team 2 (Cohen κ=0.65) with a mean Cohen κ=0.67; the breakdown is shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>A. Surprisingly, patients whose NAT label disagreed with the manual Epic chart reviews were annotated as CI using Epic and as CN using NAT. We manually reviewed the patients where the diagnostic labels disagreed; we found that NAT was able to highlight certain passages of text, such as “language, attention, and memory function are intact with good fund of knowledge”; the highlighted text facilitated the labeling of the patient as CN, whereas such phrases were easily missed in manual chart reviews. Moreover, if a patient had a transient cognitive deficit and was later evaluated as CN, for example, NAT presented all notes with highlighted evidence along with their dates in one view, making it easier to follow the sequence of events. The disagreements were mostly among patients annotated with a low confidence score in the Epic manually annotated data set [<xref ref-type="bibr" rid="ref15">15</xref>] (<xref rid="figure3" ref-type="fig">Figure 3</xref>B). The interrater agreement of NAT adjudication between team 1 and team 2 was higher (Cohen κ=0.89) than the interrater agreement (Cohen κ=0.80) with manual Epic chart reviews reported in Moura et al [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>Next, we compared the time required for phenotyping of cognitive status via NAT adjudication versus manual chart reviews in Epic. Four of the authors (DW, ER, HA, and SSM) adjudicated the full COVID-19 data set using NAT and recorded the annotation time for 129 patients. Two of the authors (HA and SSM) timed manual chart reviews in Epic for 32 randomly sampled patients. To ensure that a patient was not adjudicated using both methods by the same person, HA used Epic to perform chart reviews of patients adjudicated by SSM using NAT and vice versa. For most of the patients, the annotation time was substantially shorter with NAT as compared to manual chart reviews in Epic (<xref rid="figure3" ref-type="fig">Figure 3</xref>C). Adjudications using NAT provided substantial speed-up of annotations compared to manual chart reviews in Epic (time difference mean 1.4, SD 1.3 minutes; <italic>P</italic>&#60;.001; ratio median 2.2, min-max 0.4-20). Additionally, we observed that clinicians spent more time using NAT on the first half of patients compared to the second half. This “learning effect” was not observed with manual Epic chart reviews. The breakdown of the cognitive status for the COVID-19 data set is shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. Notably, the cognitive status for 21.1% (n=111) of patients was undetermined, suggesting that there was little information in EHR to determine their cognitive status.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Comparison of adjudication with natural language processing (NLP)–powered annotation tool (NAT) and manual Epic chart reviews: (A) contingency table displaying adjudication with NAT versus Epic by team 1 (top row) and team 2 (bottom row); (B) distribution of confidence scores assigned in Epic manual chart reviews (Moura et al [<xref ref-type="bibr" rid="ref15">15</xref>]) for agreements and disagreements between the two methods; (C) annotation time comparisons between NAT versus Epic.</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e40384_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>COVID-19 data set cognitive scores and distribution of cognitive scores in the COVID-19 data set.</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e40384_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we developed and evaluated a novel semiautomated NLP-powered annotation tool, NAT, to facilitate phenotyping of cognitive status. Clinical experts adjudicated the cognitive status of 627 patients at MGB health care using NAT or traditional chart reviews. NAT improves the efficiency and interrater reliability of chart review as compared to manual adjudication.</p>
      </sec>
      <sec>
        <title>Strengths</title>
        <p>Phenotyping methods have been applied to EHR to successfully identify patients with autism [<xref ref-type="bibr" rid="ref28">28</xref>], diabetes [<xref ref-type="bibr" rid="ref29">29</xref>], immunological diseases [<xref ref-type="bibr" rid="ref30">30</xref>], and several chronic diseases [<xref ref-type="bibr" rid="ref16">16</xref>]. EHR has been extensively used for dementia research, but the outcomes are typically defined by diagnosis codes or specialist diagnoses. Although phenotyping tools using NLP have been developed to detect cognitive impairment [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], they have been limited by their performance. In this study, we propose a novel semiautomated approach that combines NLP outputs with manual adjudication.</p>
        <p>We selected this approach as it combines the automation of an NLP tool and the expert review required for phenotyping cognitive status. Phenotyping cognitive status requires the input from both structured (eg, diagnosis codes and medications) and unstructured (eg, clinical notes and images) data, and currently, there are no machine learning tools that integrate multiple data modalities. The approach has several advantages over manual chart reviews. Cognitive concerns are often subjective, and a significant amount of information is required to confidently ascertain the correct diagnosis. Since diagnoses are staged across months or years, individual notes across time must be evaluated together—NAT filters data for the period of interest and thus facilitates the adjudication process. Next, the absence of cognitive deficits is often difficult to adjudicate with confidence. In these cases, the annotator needs to review all notes to ensure there were no signs of cognitive impairment. NAT improves the efficiency of such tasks, as it automatically flags notes with signs of cognitive impairment as well as those with information on normal cognition and ranks them in order of importance. In addition, clinicians often use a wide variety of terms and phrases in clinical notes that can easily be missed in manual reviews. NAT, on the other hand, highlights all cognition-related patterns and phrases, decreasing the likelihood that the annotator might miss any information relevant to the decision-making task. Finally, NAT streamlines an established adjudication protocol and thus improves interrater agreement. NAT can, in principle, be extended to local hospitals and clinics that have digitized data but not an EHR system.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has several limitations. First, NAT does not link to brain images, which may contain information relevant to brain function. Second, although NAT improves the efficiency of adjudicating cognitive status compared to manual chart reviews, it is not scalable to large data sets of thousands of patients. To scale to such sample sizes, fully automated machine learning algorithms that replicate the adjudication process are required. In the future, we plan to use NAT to create gold-standard data sets for training and validation of such machine learning algorithms for phenotyping cognitive status. Third, NAT adjudication was evaluated on data from a single health care system. Whether the cognition and ADL-related keywords apply to other health care settings is yet to be confirmed. The performance of the NLP tool [<xref ref-type="bibr" rid="ref26">26</xref>] also needs to be evaluated with external data. Fourth, adjudicators were not blinded to identifiable information in EHR, which may have introduced biases in their labels. Tools, such as Philter, could be used in the future to remove protected health information in NAT [<xref ref-type="bibr" rid="ref31">31</xref>]. Finally, research studies using EHR-based data sets are limited by the information available within the health care system and miss records of care outside the system. Such patients with missing information were labeled as “undetermined” in this study, but studies that use diagnosis codes for phenotyping of cognitive status may incorrectly label such patients as CN instead of distinguishing them as patients with insufficient information. Our study highlights the issue of missing information when phenotyping cognitive status in EHR, and consequently, the need for future work to minimize biases if such patients are excluded in a research study.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Although there is no substitute for a longitudinal cohort with formal cognitive evaluations to study Alzheimer disease and related dementias, leveraging EHR data with NLP holds promise. In this diagnostic study, we developed and evaluated a semiautomated NLP-powered annotation tool, NAT, to facilitate the phenotyping of cognitive status in EHRs. Expert clinicians adjudicated cognitive status of 627 patients from two distinct data sets; NAT had a high interrater agreement and improved the speed of annotations compared to manual chart reviews. Using NAT to adjudicate cognitive status would likely increase the feasibility and scalability of building gold-standard data sets for machine learning algorithms and research cohorts to study cognitive decline.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Data query, preparation, and preprocessing steps.</p>
        <media xlink:href="jmir_v24i8e40384_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 253 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Regular expressions of dementia-related keywords.</p>
        <media xlink:href="jmir_v24i8e40384_app2.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 7 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Regular expressions of activities of daily living (ADLs) keywords.</p>
        <media xlink:href="jmir_v24i8e40384_app3.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 6 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Data model.</p>
        <media xlink:href="jmir_v24i8e40384_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 650 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACO</term>
          <def>
            <p>Accountable Care Organization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ADL</term>
          <def>
            <p>activities of daily living</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CI</term>
          <def>
            <p>cognitively impaired</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CN</term>
          <def>
            <p>cognitively normal</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MCI</term>
          <def>
            <p>mild cognitive impairment</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MGB</term>
          <def>
            <p>Mass General Brigham</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NAT</term>
          <def>
            <p>NLP annotation tool</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by funding from National Institute on Aging awards (K08AG053380, R01AG073410, and P30AG062421), a National Institute of Mental Health award (K23MH115812), the James S McDonnell Foundation, and the Rappaport Fellowship. The funding organizations had no role in the design and conduct of the study; collection, management, analysis, and interpretation of the data; preparation, review, or approval of the manuscript; and decision to submit the manuscript for publication.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>ASP, BTH, DB, MBW, SSM, and SD conceptualized and designed the study. Acquisition, analysis, or interpretation of data was conducted by AN, CM, XL, TT, AK, HA, ER, DW, LB, GKR, LM, SZ, NMB, JH, and JD. The manuscript was drafted by AN, CM, and SD. All authors contributed to critical revision of the manuscript. Statistical analysis was performed by AN, CM, and XL. Funding was obtained by BTH, SSM, and SD. The study was supervised by SSM and SD.</p>
      </fn>
      <fn fn-type="conflict">
        <p>SD, BTH, and ASP report research funding from Abbvie Inc. JD serves on a scientific review board for I-Mab Biopharma. NMB volunteers for the Epic Behavioral Health Subspecialty Steering Board.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Schulam</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Beam</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>IY</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganath</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A review of challenges and opportunities in machine learning for health</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2020</year>
          <volume>2020</volume>
          <fpage>191</fpage>
          <lpage>200</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32477638"/>
          </comment>
          <pub-id pub-id-type="medline">32477638</pub-id>
          <pub-id pub-id-type="pmcid">PMC7233077</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sendak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nichols</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Balu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Machine learning in health care: a critical appraisal of challenges and opportunities</article-title>
          <source>EGEMS (Wash DC)</source>
          <year>2019</year>
          <month>01</month>
          <day>24</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30705919"/>
          </comment>
          <pub-id pub-id-type="doi">10.5334/egems.287</pub-id>
          <pub-id pub-id-type="medline">30705919</pub-id>
          <pub-id pub-id-type="pmcid">PMC6354017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <month>06</month>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
          <pub-id pub-id-type="medline">22549152</pub-id>
          <pub-id pub-id-type="pii">nrg3208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Horwitz</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Toh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using machine learning to identify health outcomes from electronic health record data</article-title>
          <source>Curr Epidemiol Rep</source>
          <year>2018</year>
          <month>12</month>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>331</fpage>
          <lpage>342</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30555773"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40471-018-0165-9</pub-id>
          <pub-id pub-id-type="medline">30555773</pub-id>
          <pub-id pub-id-type="pmcid">PMC6289196</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Pincock</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Baumgart</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Sadowski</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Fedorak</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Kroeker</surname>
              <given-names>KI</given-names>
            </name>
          </person-group>
          <article-title>An overview of clinical decision support systems: benefits, risks, and strategies for success</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <volume>3</volume>
          <fpage>17</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32047862"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0221-y</pub-id>
          <pub-id pub-id-type="medline">32047862</pub-id>
          <pub-id pub-id-type="pii">221</pub-id>
          <pub-id pub-id-type="pmcid">PMC7005290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ananthakrishnan</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Castro</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Link</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Honerlaw</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gagnon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Plenge</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>O'Donnell</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Gaziano</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>KP</given-names>
            </name>
          </person-group>
          <article-title>High-throughput phenotyping with electronic medical record data using a common semi-supervised approach (PheCAP)</article-title>
          <source>Nat Protoc</source>
          <year>2019</year>
          <month>12</month>
          <volume>14</volume>
          <issue>12</issue>
          <fpage>3426</fpage>
          <lpage>3444</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31748751"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41596-019-0227-6</pub-id>
          <pub-id pub-id-type="medline">31748751</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41596-019-0227-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7323894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wilkinson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Arnold</surname>
              <given-names>KF</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>van Smeden</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sippy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>de Kamps</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Konigorski</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lippert</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilthorpe</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Tennant</surname>
              <given-names>PWG</given-names>
            </name>
          </person-group>
          <article-title>Time to reality check the promises of machine learning-powered precision medicine</article-title>
          <source>Lancet Digit Health</source>
          <year>2020</year>
          <month>12</month>
          <volume>2</volume>
          <issue>12</issue>
          <fpage>e677</fpage>
          <lpage>e680</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(20)30200-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(20)30200-4</pub-id>
          <pub-id pub-id-type="medline">33328030</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(20)30200-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC9060421</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>Alzheimer’s Association</collab>
          </person-group>
          <article-title>2021 Alzheimer's disease facts and figures</article-title>
          <source>Alzheimers Dement</source>
          <year>2021</year>
          <month>03</month>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>327</fpage>
          <lpage>406</lpage>
          <pub-id pub-id-type="doi">10.1002/alz.12328</pub-id>
          <pub-id pub-id-type="medline">33756057</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amjad</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Sheehan</surname>
              <given-names>OC</given-names>
            </name>
            <name name-style="western">
              <surname>Lyketsos</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Wolff</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Samus</surname>
              <given-names>QM</given-names>
            </name>
          </person-group>
          <article-title>Underdiagnosis of dementia: an observational study of patterns in diagnosis and awareness in US older adults</article-title>
          <source>J Gen Intern Med</source>
          <year>2018</year>
          <month>07</month>
          <volume>33</volume>
          <issue>7</issue>
          <fpage>1131</fpage>
          <lpage>1138</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29508259"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-018-4377-y</pub-id>
          <pub-id pub-id-type="medline">29508259</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-018-4377-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC6025653</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bradford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kunik</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Missed and delayed diagnosis of dementia in primary care: prevalence and contributing factors</article-title>
          <source>Alzheimer Dis Assoc Disord</source>
          <year>2009</year>
          <volume>23</volume>
          <issue>4</issue>
          <fpage>306</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19568149"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/WAD.0b013e3181a6bebc</pub-id>
          <pub-id pub-id-type="medline">19568149</pub-id>
          <pub-id pub-id-type="pmcid">PMC2787842</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taylor, Jr.</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Østbye</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Langa</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Plassman</surname>
              <given-names>BL</given-names>
            </name>
          </person-group>
          <article-title>The accuracy of Medicare claims as an epidemiological tool: the case of dementia revisited</article-title>
          <source>JAD</source>
          <year>2009</year>
          <month>07</month>
          <day>23</day>
          <volume>17</volume>
          <issue>4</issue>
          <fpage>807</fpage>
          <lpage>815</lpage>
          <pub-id pub-id-type="doi">10.3233/jad-2009-1099</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kotagal</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Langa</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Plassman</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>GG</given-names>
            </name>
            <name name-style="western">
              <surname>Giordani</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Steffens</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Kabeto</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Albin</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>NL</given-names>
            </name>
          </person-group>
          <article-title>Factors associated with cognitive evaluations in the United States</article-title>
          <source>Neurology</source>
          <year>2014</year>
          <month>11</month>
          <day>26</day>
          <volume>84</volume>
          <issue>1</issue>
          <fpage>64</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1212/wnl.0000000000001096</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ostbye</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Clipp</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Scoyoc</surname>
              <given-names>LV</given-names>
            </name>
            <name name-style="western">
              <surname>Plassman</surname>
              <given-names>BL</given-names>
            </name>
          </person-group>
          <article-title>Identification of dementia: agreement among national survey data, medicare claims, and death certificates</article-title>
          <source>Health Serv Res</source>
          <year>2008</year>
          <month>03</month>
          <volume>43</volume>
          <issue>1 Pt 1</issue>
          <fpage>313</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18211532"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1475-6773.2007.00748.x</pub-id>
          <pub-id pub-id-type="medline">18211532</pub-id>
          <pub-id pub-id-type="pii">HESR748</pub-id>
          <pub-id pub-id-type="pmcid">PMC2323140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tysinger</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Crimmins</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zissimopoulos</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Analysis of dementia in the US population using Medicare claims: insights from linked survey and administrative claims data</article-title>
          <source>Alzheimers Dement (N Y)</source>
          <year>2019</year>
          <month>06</month>
          <day>06</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>197</fpage>
          <lpage>207</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2352-8737(19)30017-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.trci.2019.04.003</pub-id>
          <pub-id pub-id-type="medline">31198838</pub-id>
          <pub-id pub-id-type="pii">S2352-8737(19)30017-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC6556828</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moura</surname>
              <given-names>LMVR</given-names>
            </name>
            <name name-style="western">
              <surname>Festa</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Volya</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Benson</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Zafar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Blacker</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Normand</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Newhouse</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Identifying Medicare beneficiaries with dementia</article-title>
          <source>J Am Geriatr Soc</source>
          <year>2021</year>
          <month>08</month>
          <day>26</day>
          <volume>69</volume>
          <issue>8</issue>
          <fpage>2240</fpage>
          <lpage>2251</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33901296"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jgs.17183</pub-id>
          <pub-id pub-id-type="medline">33901296</pub-id>
          <pub-id pub-id-type="pmcid">PMC8373730</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhalishahi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rinaldi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>04</month>
          <day>27</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12239</pub-id>
          <pub-id pub-id-type="medline">31066697</pub-id>
          <pub-id pub-id-type="pii">v7i2e12239</pub-id>
          <pub-id pub-id-type="pmcid">PMC6528438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Teixeira</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Mo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cronin</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Warner</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Combining billing codes, clinical notes, and medications from electronic health records provides superior phenotyping performance</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <month>04</month>
          <volume>23</volume>
          <issue>e1</issue>
          <fpage>e20</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26338219"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv130</pub-id>
          <pub-id pub-id-type="medline">26338219</pub-id>
          <pub-id pub-id-type="pii">ocv130</pub-id>
          <pub-id pub-id-type="pmcid">PMC4954637</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gilmore-Bykovskyi</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Block</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Walljasper</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gleason</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>MN</given-names>
            </name>
          </person-group>
          <article-title>Unstructured clinical documentation reflecting cognitive and behavioral dysfunction: toward an EHR-based phenotype for cognitive impairment</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>09</month>
          <day>01</day>
          <volume>25</volume>
          <issue>9</issue>
          <fpage>1206</fpage>
          <lpage>1212</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29947805"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy070</pub-id>
          <pub-id pub-id-type="medline">29947805</pub-id>
          <pub-id pub-id-type="pii">5045461</pub-id>
          <pub-id pub-id-type="pmcid">PMC6118865</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reuben</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Hackbarth</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Wenger</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>ZS</given-names>
            </name>
            <name name-style="western">
              <surname>Jennings</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>An automated approach to identifying patients with dementia using electronic medical records</article-title>
          <source>J Am Geriatr Soc</source>
          <year>2017</year>
          <month>03</month>
          <volume>65</volume>
          <issue>3</issue>
          <fpage>658</fpage>
          <lpage>659</lpage>
          <pub-id pub-id-type="doi">10.1111/jgs.14744</pub-id>
          <pub-id pub-id-type="medline">28152164</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yarnall</surname>
              <given-names>KSH</given-names>
            </name>
            <name name-style="western">
              <surname>Pollak</surname>
              <given-names>KI</given-names>
            </name>
            <name name-style="western">
              <surname>Østbye</surname>
              <given-names>Truls</given-names>
            </name>
            <name name-style="western">
              <surname>Krause</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Michener</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Primary care: is there enough time for prevention?</article-title>
          <source>Am J Public Health</source>
          <year>2003</year>
          <month>04</month>
          <volume>93</volume>
          <issue>4</issue>
          <fpage>635</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.93.4.635</pub-id>
          <pub-id pub-id-type="medline">12660210</pub-id>
          <pub-id pub-id-type="pmcid">PMC1447803</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boustani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perkins</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Unverzagt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Austrom</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Fultz</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Hendrie</surname>
              <given-names>HC</given-names>
            </name>
          </person-group>
          <article-title>Who refuses the diagnostic assessment for dementia in primary care?</article-title>
          <source>Int J Geriat Psychiatry</source>
          <year>2006</year>
          <month>06</month>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>556</fpage>
          <lpage>563</lpage>
          <pub-id pub-id-type="doi">10.1002/gps.1524</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fowler</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Frame</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Perkins</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Watson</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Monahan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Boustani</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Traits of patients who screen positive for dementia and refuse diagnostic assessment</article-title>
          <source>Alzheimers Dement (Amst)</source>
          <year>2015</year>
          <month>06</month>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>236</fpage>
          <lpage>241</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26258162"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.dadm.2015.01.002</pub-id>
          <pub-id pub-id-type="medline">26258162</pub-id>
          <pub-id pub-id-type="pmcid">PMC4527161</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Boscardin</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marcum</surname>
              <given-names>ZA</given-names>
            </name>
            <name name-style="western">
              <surname>Dublin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>development and validation of eRADAR: a tool using EHR data to detect unrecognized dementia</article-title>
          <source>J Am Geriatr Soc</source>
          <year>2020</year>
          <month>01</month>
          <volume>68</volume>
          <issue>1</issue>
          <fpage>103</fpage>
          <lpage>111</lpage>
          <pub-id pub-id-type="doi">10.1111/jgs.16182</pub-id>
          <pub-id pub-id-type="medline">31612463</pub-id>
          <pub-id pub-id-type="pmcid">PMC7094818</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>O'Horo</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Kashyap</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>RO</given-names>
            </name>
            <name name-style="western">
              <surname>Fryer</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Rabinstein</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Gajic</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Derivation and validation of the automated search algorithms to identify cognitive impairment and dementia in electronic health records</article-title>
          <source>J Crit Care</source>
          <year>2017</year>
          <month>02</month>
          <volume>37</volume>
          <fpage>202</fpage>
          <lpage>205</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jcrc.2016.09.026</pub-id>
          <pub-id pub-id-type="medline">27969571</pub-id>
          <pub-id pub-id-type="pii">S0883-9441(16)30266-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bastian</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Glasziou</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chalmers</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Seventy-five trials and eleven systematic reviews a day: how will we ever keep up?</article-title>
          <source>PLoS Med</source>
          <year>2010</year>
          <month>09</month>
          <day>21</day>
          <volume>7</volume>
          <issue>9</issue>
          <fpage>e1000326</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pmed.1000326"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1000326</pub-id>
          <pub-id pub-id-type="medline">20877712</pub-id>
          <pub-id pub-id-type="pmcid">PMC2943439</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tyagi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Magdamo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Noori</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Deodhar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sheu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Alabasi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Brenner</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zafar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Benson</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Moura</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Serrano-Pozo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Prokopenko</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tanzi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hyman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Blacker</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mukerji</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Westover</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using deep learning to identify patients with cognitive impairment in electronic health records</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on Nov 13, 2021</comment>
          <comment> arXiv:2111.09115 [cs.CL]</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2111.09115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Harten</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Mielke</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Swenson-Dravis</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Hagen</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>RO</given-names>
            </name>
            <name name-style="western">
              <surname>Geda</surname>
              <given-names>YE</given-names>
            </name>
            <name name-style="western">
              <surname>Knopman</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>RC</given-names>
            </name>
          </person-group>
          <article-title>Subjective cognitive decline and risk of MCI</article-title>
          <source>Neurology</source>
          <year>2018</year>
          <month>06</month>
          <day>29</day>
          <volume>91</volume>
          <issue>4</issue>
          <fpage>e300</fpage>
          <lpage>e312</lpage>
          <pub-id pub-id-type="doi">10.1212/wnl.0000000000005863</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leroy</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pettygrove</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Galindo</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Arora</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kurzius-Spencer</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Automated extraction of diagnostic criteria from electronic health records for autism spectrum disorders: development, evaluation, and application</article-title>
          <source>J Med Internet Res</source>
          <year>2018</year>
          <month>11</month>
          <day>07</day>
          <volume>20</volume>
          <issue>11</issue>
          <fpage>e10497</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2018/11/e10497/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10497</pub-id>
          <pub-id pub-id-type="medline">30404767</pub-id>
          <pub-id pub-id-type="pii">v20i11e10497</pub-id>
          <pub-id pub-id-type="pmcid">PMC6249505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A machine learning-based framework to identify type 2 diabetes through electronic health records</article-title>
          <source>Int J Med Inform</source>
          <year>2017</year>
          <month>12</month>
          <volume>97</volume>
          <fpage>120</fpage>
          <lpage>127</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27919371"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2016.09.014</pub-id>
          <pub-id pub-id-type="medline">27919371</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(16)30215-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5144921</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Juhn</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence approaches using natural language processing to advance EHR-based clinical research</article-title>
          <source>J Allergy Clin Immunol</source>
          <year>2020</year>
          <month>02</month>
          <volume>145</volume>
          <issue>2</issue>
          <fpage>463</fpage>
          <lpage>469</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31883846"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jaci.2019.12.897</pub-id>
          <pub-id pub-id-type="medline">31883846</pub-id>
          <pub-id pub-id-type="pii">S0091-6749(19)32604-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7771189</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Norgeot</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Muenzen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Schenk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rutenberg</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Oskotsky</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sirota</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yazdany</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schmajuk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ludwig</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Protected Health Information filter (Philter): accurately and securely de-identifying free-text clinical notes</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <month>04</month>
          <day>14</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>57</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-0258-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0258-y</pub-id>
          <pub-id pub-id-type="medline">32337372</pub-id>
          <pub-id pub-id-type="pii">258</pub-id>
          <pub-id pub-id-type="pmcid">PMC7156708</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
