<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i8e20773</article-id>
      <article-id pub-id-type="pmid">32759101</article-id>
      <article-id pub-id-type="doi">10.2196/20773</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Natural Language Processing for Rapid Response to Emergent Diseases: Case Study of Calcium Channel Blockers and Hypertension in the COVID-19 Pandemic</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kalicoglu</surname>
            <given-names>Halil</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zheng</surname>
            <given-names>Shuai</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pförringer</surname>
            <given-names>Dominikminik</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shah</surname>
            <given-names>Nigam</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Neuraz</surname>
            <given-names>Antoine</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>Necker-Enfant Malades Hospital</institution>
            <institution>Assistance Publique – Hôpitaux de Paris (AP-HP)</institution>
            <addr-line>Bat Imagine, Bureau 145</addr-line>
            <addr-line>149 rue de Sèvres</addr-line>
            <addr-line>Paris, 75015</addr-line>
            <country>France</country>
            <phone>33 0624622355</phone>
            <email>antoine.neuraz@aphp.fr</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7142-6728</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Lerner</surname>
            <given-names>Ivan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5466-1707</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Digan</surname>
            <given-names>William</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7290-3282</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Paris</surname>
            <given-names>Nicolas</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1533-5087</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Tsopra</surname>
            <given-names>Rosy</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9406-5547</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Rogier</surname>
            <given-names>Alice</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5499-3197</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Baudoin</surname>
            <given-names>David</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1552-8356</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Cohen</surname>
            <given-names>Kevin Bretonnel</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1749-8290</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Burgun</surname>
            <given-names>Anita</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6855-4366</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Garcelon</surname>
            <given-names>Nicolas</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3326-2811</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Rance</surname>
            <given-names>Bastien</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4417-1197</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <collab>AP-HP/Universities/INSERM COVID-19 Research Collaboration; AP-HP COVID CDR Initiative</collab>
          <xref rid="aff8" ref-type="aff">8</xref>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>Necker-Enfant Malades Hospital</institution>
        <institution>Assistance Publique – Hôpitaux de Paris (AP-HP)</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Centre de Recherche des Cordeliers</institution>
        <institution>INSERM UMRS 1138 Team 22</institution>
        <institution>Université de Paris</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>LIMSI</institution>
        <institution>CNRS</institution>
        <institution>Université Paris Saclay</institution>
        <addr-line>Orsay</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>Georges Pompidou European Hospital</institution>
        <institution>Assistance Publique – Hôpitaux de Paris (AP-HP)</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>DSI WIND</institution>
        <institution>Assistance Publique – Hôpitaux de Paris (AP-HP)</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>School of Medicine</institution>
        <institution>University of Colorado</institution>
        <addr-line>Denver, CO</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Institut Imagine, INSERM U1163</institution>
        <institution>Université Paris Descartes</institution>
        <institution>Université de Paris</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Please see acknowledgements for list of collaborators</institution>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Antoine Neuraz <email>antoine.neuraz@aphp.fr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>14</day>
        <month>8</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>8</issue>
      <elocation-id>e20773</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>6</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>23</day>
          <month>6</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>26</day>
          <month>7</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Antoine Neuraz, Ivan Lerner, William Digan, Nicolas Paris, Rosy Tsopra, Alice Rogier, David Baudoin, Kevin Bretonnel Cohen, Anita Burgun, Nicolas Garcelon, Bastien Rance,  AP-HP/Universities/INSERM COVID-19 Research Collaboration; AP-HP COVID CDR Initiative. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 14.08.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/8/e20773/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>A novel disease poses special challenges for informatics solutions. Biomedical informatics relies for the most part on structured data, which require a preexisting data or knowledge model; however, novel diseases do not have preexisting knowledge models. In an emergent epidemic, language processing can enable rapid conversion of unstructured text to a novel knowledge model. However, although this idea has often been suggested, no opportunity has arisen to actually test it in real time. The current coronavirus disease (COVID-19) pandemic presents such an opportunity.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to evaluate the added value of information from clinical text in response to emergent diseases using natural language processing (NLP).</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We explored the effects of long-term treatment by calcium channel blockers on the outcomes of COVID-19 infection in patients with high blood pressure during in-patient hospital stays using two sources of information: data available strictly from structured electronic health records (EHRs) and data available through structured EHRs and text mining.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In this multicenter study involving 39 hospitals, text mining increased the statistical power sufficiently to change a negative result for an adjusted hazard ratio to a positive one. Compared to the baseline structured data, the number of patients available for inclusion in the study increased by 2.95 times, the amount of available information on medications increased by 7.2 times, and the amount of additional phenotypic information increased by 11.9 times.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>In our study, use of calcium channel blockers was associated with decreased in-hospital mortality in patients with COVID-19 infection. This finding was obtained by quickly adapting an NLP pipeline to the domain of the novel disease; the adapted pipeline still performed sufficiently to extract useful information. When that information was used to supplement existing structured data, the sample size could be increased sufficiently to see treatment effects that were not previously statistically detectable.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>medication information</kwd>
        <kwd>natural language processing</kwd>
        <kwd>electronic health records</kwd>
        <kwd>COVID-19</kwd>
        <kwd>public health</kwd>
        <kwd>response</kwd>
        <kwd>emergent disease</kwd>
        <kwd>informatics</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Outbreaks of novel diseases can create enormous strain on public health systems. Since the time of Snow's pioneering work [<xref ref-type="bibr" rid="ref1">1</xref>] on the epidemiology of the London cholera outbreak of 1854, it has been clear that information is key to the successful abatement of these substantial public health challenges. Currently, health care systems have access to quantities of data that would have been unimaginable in Snow’s time. Because these data are in electronic format, they can be manipulated and exploited rapidly. However, a novel disease poses special challenges for informatics solutions. Biomedical informatics relies for the most part on structured data; structured data require a preexisting data or knowledge model; and a novel disease will not have a preexisting knowledge model. This poses a formidable obstacle to leveraging informatics solutions to address the type of public health crisis the world is facing at the time of writing. One solution to the lack of structured information is natural language processing (NLP).</p>
      <p>Biomedical text mining, or the use of textual data, in electronic health records (EHRs) has often been proposed as a method for converting unstructured data to the structured data that is needed in public health informatics. One of the advantages of biomedical text mining is that it can be developed rapidly [<xref ref-type="bibr" rid="ref2">2</xref>], which can permit the leveraging of electronic health records of patients with a novel disease as quickly as they are entered into the EHR. However, although this has often been suggested [<xref ref-type="bibr" rid="ref3">3</xref>], there has never been an opportunity to actually test that claim in real time. Thus, the current novel coronavirus disease (COVID-19) pandemic, with all of its challenges, presents an opportunity to advance the state of public health informatics. In this paper, we tested this possibility with a case study on the effects of use of calcium channel blockers (CCBs) in patients with high blood pressure on the risk of death from COVID-19 infection. An association between CCB and the outcome of COVID-19 infection has already been suggested [<xref ref-type="bibr" rid="ref4">4</xref>] but has not previously been explored in a large multicenter clinical study.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source and NLP Pipeline</title>
        <p>The data used in this study were obtained from 39 different hospitals in the Paris metropolitan area in the Assistance Publique – Hôpitaux de Paris (AP-HP) system. Focusing on this region of the country and on a large number of hospitals afforded a diversity of patient demographics that would not be available in most other parts of the country. As of May 4, 2020, the Entrepôt de Données de Santé (EDS)-COVID data set contained 84,966 electronic records of suspected or confirmed patients with COVID-19 (see <xref ref-type="table" rid="table1">Table 1</xref> for further details on the data set). The records comprise structured fields and free text documents, including clinical notes and narratives. Most of the textual documents do not follow a specific structure and contain different types of patient information, such as patient history, family history, laboratory results, drug history, and prescriptions. Therefore, they represent an excellent test case for the real abilities of text mining. We used the following pipeline:</p>
        <list list-type="bullet">
          <list-item>
            <p>Typical preprocessing steps (ie, text cleaning and sentence detection) were applied to the full data set (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for a detailed description).</p>
          </list-item>
          <list-item>
            <p>Drug names and details of administration (dose, route of administration, frequency, and duration) were extracted via a deep learning approach based on bidirectional encoder representations from transformers (BERT) contextual embeddings [<xref ref-type="bibr" rid="ref5">5</xref>] (NLP Medication).</p>
          </list-item>
          <list-item>
            <p>Specific phenotypes associated with COVID-19 (eg, obesity, smoking status), scores (eg, sequential organ failure assessment score) and physiological measures (eg, BMI), were extracted via a list of 60 regular expressions (NLP RegExp).</p>
          </list-item>
          <list-item>
            <p>All signs, symptoms, and comorbidities included in the Unified Medical Language System (UMLS) [<xref ref-type="bibr" rid="ref6">6</xref>] were extracted with the quickUMLS algorithm [<xref ref-type="bibr" rid="ref7">7</xref>] (NLP UMLS).</p>
          </list-item>
        </list>
        <p>A visual depiction of the pipeline is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        <p>The NLP medication extraction model was a bidirectional long short-term memory with a conditional random field (BiLSTM-CRF) [<xref ref-type="bibr" rid="ref8">8</xref>] layer on top of a vector representation of tokens using BERT [<xref ref-type="bibr" rid="ref5">5</xref>]. We fine-tuned multilingual BERT on a set of 10 million clinical texts from EHRs. The model was trained on the APMed corpus, a manually annotated corpus of French clinical texts described in [<xref ref-type="bibr" rid="ref9">9</xref>]. We used the FLAIR [<xref ref-type="bibr" rid="ref10">10</xref>] implementation with 2 layers of 1024 units for the LSTMs with an asynchronous stochastic gradient descent (ASGD) optimizer and a reduction of the learning rate on plateau.</p>
        <p>The NLP regular expression for the extraction of specific phenotypes was a set of 60 regular expressions developed manually and iteratively by medical informatics experts and physicians. We evaluated their precision at the sentence level using a random sample of 100 positive sentences for each regular expression. Examples of these expressions can be found in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <p>All the terms extracted by the NLP pipeline, regardless of the method, were automatically annotated according to their modality (negated or hypothetical) and experiencer in the text, as described in previous work [<xref ref-type="bibr" rid="ref11">11</xref>]. The outputs of the NLP pipeline were normalized to the Observational Medical Outcomes Partnership (OMOP) common data model (CDM) [<xref ref-type="bibr" rid="ref12">12</xref>] and were fed back to the database system on a daily basis.</p>
      </sec>
      <sec>
        <title>Data Availability</title>
        <p>Data supporting this study can be made available on request, on condition that the research project is accepted by the scientific and ethics committee of the AP-HP health data warehouse [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
      </sec>
      <sec>
        <title>Clinical Application: Long-Term CCB Use and Outcomes of COVID-19 in Patients With High Blood Pressure</title>
        <p>The clinical goal of this case study was to evaluate the potential effects of CCBs on in-hospital mortality related to COVID-19 [<xref ref-type="bibr" rid="ref4">4</xref>]. To achieve this goal, we used two different sources of data. The first source was two elements of structured data: International Classification of Disease, Tenth Revision (ICD-10) codes and medication prescriptions from an electronic prescription system. The second source was information on medications and comorbidities extracted by the NLP pipeline from nonstructured fields in the EHR. The inclusion criterion for patients was COVID-19 disease confirmed by reverse transcriptase–polymerase chain reaction (RT-PCR).</p>
        <p>We considered a patient as receiving long-term treatment with CCBs (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>) if there were at least two mentions (in structured data or extracted with NLP, respectively) in the last 6 months. We qualified cases as having comorbidities through one occurrence of an ICD-10 code (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>) or two NLP mentions in the last 6 months.</p>
        <p>The measured outcome was in-hospital mortality. We used a multivariate Cox proportional hazard model [<xref ref-type="bibr" rid="ref14">14</xref>] that was adjusted according to age, gender, and the presence of obesity, diabetes, and cancer. The level of significance was set as <italic>P=</italic>.05, and all statistical tests were two-sided. We used R statistical software v.3.6.2 (R Project) with the Survival package.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>NLP Pipeline</title>
        <p>As <xref ref-type="table" rid="table1">Table 1</xref> shows, NLP markedly expanded the quantity of medication and phenotype information available for the analysis. The number of data points for medication increased by 7.2 times (<italic>NLP medication</italic>)⁄(<italic>structured medication</italic>), and the number of phenotypes increased by 15.2 times ((<italic>NLP RegExp</italic> + <italic>NLP UMLS</italic>)⁄(<italic>ICD-10 codes</italic>). Among the 84,966 patients with records present in the EDS-COVID cohort (<xref ref-type="table" rid="table1">Table 1</xref>), 45,593 (53.7%) contained drug information in their narrative EHR documents, whereas only 19,791 (23.3%) of the patients had medication information available in the structured fields in the EHR.</p>
        <p>For specific phenotypes with existing ICD-10 codes (<xref ref-type="fig" rid="figure1">Figure 1</xref>), information was only available in clinical free-text fields for the majority of patients: 7133/8526 (60.2%) for diabetes, and 2138/2871 (74.5%) for obesity. Some items were absent from the structured data but could be recovered using the NLP extraction pipeline, such as COVID-19–specific symptoms such as ageusia (2449 patients) and anosmia (2732 patients).</p>
        <p>In terms of quality, the extraction of medication names showed an F1 score of 93.8% (91.6% after normalization) in all sections. When focusing on the admission and discharge treatment sections, the F1 score was 96.7% (96.0% after normalization). The detailed results are shown in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>. Regarding the phenotypes extracted by regular expressions in our case study, hypertension showed a precision of 99%, and obesity, diabetes, and cancer showed precisions of 94%, 80%, and 91%, respectively.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Description of the information extracted using the NLP pipeline in the EDS-COVID cohort (N=84,966).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="270"/>
            <col width="320"/>
            <col width="210"/>
            <thead>
              <tr valign="top">
                <td>Source</td>
                <td>Patient records (N=84,966), n (%)</td>
                <td>Documents (N=1,524,057), n (%)</td>
                <td>Data points, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>NLP<sup>a</sup> Medication</td>
                <td>45,593 (53.7)</td>
                <td>696,125 (45.7)</td>
                <td>5,995,945</td>
              </tr>
              <tr valign="top">
                <td>NLP RegExp<sup>b</sup></td>
                <td>44,498 (52.4)</td>
                <td>711,900 (46.7)</td>
                <td>5,449,932</td>
              </tr>
              <tr valign="top">
                <td>NLP UMLS<sup>c</sup></td>
                <td>44,035 (51.8)</td>
                <td>833,610 (54.7)</td>
                <td>19,626,172</td>
              </tr>
              <tr valign="top">
                <td>Structured medication</td>
                <td>19,791 (23.3)</td>
                <td>N/A<sup>d</sup></td>
                <td>826,554</td>
              </tr>
              <tr valign="top">
                <td>ICD-10<sup>e</sup> codes</td>
                <td>38,993 (45.9)</td>
                <td>N/A</td>
                <td>1,643,819</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>RegExp: regular expression.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>UMLS: Unified Medical Language System.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>ICD-10: International Classification of Disease, Tenth Revision.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Quantity of patients with information for a selection of items depending on the source of data.</p>
          </caption>
          <graphic xlink:href="jmir_v22i8e20773_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Case Study</title>
        <p>Of the 84,966 total patients, 3965 (4.7%) were included using the NLP pipeline, of which only 1343 (15.9%) could be included if the study were limited to the use of structured data; this increased the number of patients added for the case study increased by 2.95 times (<xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>). A detailed description of the population of patients who tested positive for COVID-19 with a history of high blood pressure can be found in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>). In terms of the temporal depth of CCB treatment information, <xref ref-type="fig" rid="figure2">Figure 2</xref> shows that a higher volume of information was obtained from text fields compared to structured data.</p>
        <p>When using only structured data, we observed an adjusted hazard ratio (aHR) of 0.83 (95% CI 0.67-1.05) for treatment with CCBs; this result was not statistically significant (<italic>P</italic>=.12). When including NLP data, the aHR became 0.82 (95% CI 0.71-0.94), which represents a statistically significant reduction of the risk of death (<italic>P</italic>=.005). Similar results can be observed that support an increased risk of mortality with the presence of diabetes and cancer as comorbidities (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Quantity of information about calcium channel blockers for the two data sources over time. NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="jmir_v22i8e20773_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Results of the multivariate Cox survival model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="280"/>
            <col width="120"/>
            <col width="130"/>
            <col width="120"/>
            <col width="90"/>
            <col width="130"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristic</td>
                <td colspan="3">Structured data</td>
                <td colspan="3">NLP<sup>a</sup></td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>aHR<sup>b</sup></td>
                <td>95% CI</td>
                <td><italic>P</italic> value</td>
                <td>HR<sup>c</sup></td>
                <td>95% CI</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Calcium channel blockers</td>
                <td>0.83</td>
                <td>0.67-1.05</td>
                <td>.12</td>
                <td>0.82</td>
                <td>0.71-0.94</td>
                <td>.005</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Age (years)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>45-64</td>
                <td>Reference</td>
                <td>N/A<sup>d</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>18-44</td>
                <td>0.20</td>
                <td>0.03-1.46</td>
                <td>.11</td>
                <td>0.35</td>
                <td>0.15-0.80</td>
                <td>.01</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>65-74</td>
                <td>1.50</td>
                <td>0.99-2.27</td>
                <td>.053</td>
                <td>1.95</td>
                <td>1.54-2.47</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>75-84</td>
                <td>1.68</td>
                <td>1.14-2.48</td>
                <td>.009</td>
                <td>2.94</td>
                <td>2.35-3.69</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>85+</td>
                <td>2.45</td>
                <td>1.66-3.61</td>
                <td>&#60;.001</td>
                <td>3.99</td>
                <td>3.16-5.03</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Gender</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>Reference</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>1.59</td>
                <td>1.27-2.00</td>
                <td>&#60;.001</td>
                <td>1.53</td>
                <td>1.32-1.77</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Obesity</td>
                <td>1.07</td>
                <td>0.81-1.42</td>
                <td>.60</td>
                <td>1.13</td>
                <td>0.90-1.41</td>
                <td>.30</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Diabetes</td>
                <td>1.22</td>
                <td>0.98-1.52</td>
                <td>.08</td>
                <td>1.25</td>
                <td>1.09-1.45</td>
                <td>.002</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Cancer</td>
                <td>1.20</td>
                <td>0.96-1.49</td>
                <td>.11</td>
                <td>1.34</td>
                <td>1.15-1.56</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>NLP: natural language processing.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>aHR: adjusted hazard ratio.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>HR: hazard ratio.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>In this paper, we investigated the potential utility of biomedical NLP in the context of a rapidly emerging novel disease. To do this, we asked a specific question: Does the leveraging of unstructured textual information via NLP yield clinically actionable information? To answer this question, we used NLP to extract information about hypertension and a medication for treating it from the EHRs of patients with COVID-19. The results showed that an NLP pipeline can be adapted quickly to the domain of a novel disease, it can perform well enough to extract useful information, and when that information is used to supplement the structured data that is already available, the sample size can be increased sufficiently to see treatment effects that were not previously statistically detectable.</p>
      <p>Several agencies, notably the European Medicines Agency, have highlighted the benefits of using real-world data for research, in particular for the generation of complementary evidence and new hypotheses [<xref ref-type="bibr" rid="ref15">15</xref>]. During the peak of the COVID-19 pandemic, the time available for clinicians to enter EHR data was greatly reduced. Medical informatics became vital to manage the crisis in hospitals and acquire better knowledge of the disease. The NLP pipeline was implemented within two weeks at the beginning of the COVID-19 epidemic in France, building on previous developments in artificial intelligence and text mining at AP-HP. More specifically, combining nonspecific preexisting developments (eg, negation, family history, and hypothesis detection) to tailored extractions (ie, regular expressions) allowed us to obtain rapid results of sufficient quality.</p>
      <p>Approximately 60 internal research projects exploring EDS-COVID data were submitted for Institutional Review Board approval within the first eight weeks of COVID-19 epidemic. More than half of these projects studied variables such as symptoms (eg, ageusia), radiological signs (eg, crazy paving), comorbidities (eg, obesity), and drug history (eg, hydroxychloroquine), requiring extraction of information from narrative reports in EHRs.</p>
      <p>The case study described in this paper shows the possible impact of using information extracted from text in the EHR for COVID-19 research. More precisely, the conclusions of the above study would have been different if information from unstructured fields had been excluded. In our case study, the addition of information from NLP did not dramatically change the hazard ratio from the analyses; however, it allowed us to include more patients and therefore narrowed the CIs and increased the statistical power. Note that the increased statistical power is mainly due to the increase in the number of patients included and the quantity of data available. Further analyses are required to assess the validity of the associations detected here, given that some confounding biases may remain and provoke false positive results. Reproducing the analysis with an external population or performing falsification testing [<xref ref-type="bibr" rid="ref16">16</xref>] could help improve the validity of these findings.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary methods.</p>
        <media xlink:href="jmir_v22i8e20773_app1.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Description of the natural language processing pipeline.</p>
        <media xlink:href="jmir_v22i8e20773_app2.docx" xlink:title="DOCX File , 53 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Examples of regular expression for the extraction of phenotypes.</p>
        <media xlink:href="jmir_v22i8e20773_app3.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Definition of calcium channel blockers (name, ATC number).</p>
        <media xlink:href="jmir_v22i8e20773_app4.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Definition of phenotypes (name, ICD0-10 code).</p>
        <media xlink:href="jmir_v22i8e20773_app5.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Performance of the medication information extraction model before and after normalization of the entities.</p>
        <media xlink:href="jmir_v22i8e20773_app6.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Flowchart of the use case: patients who tested positive for COVID-19 who have hypertension.</p>
        <media xlink:href="jmir_v22i8e20773_app7.docx" xlink:title="DOCX File , 227 KB"/>
      </supplementary-material>
      <supplementary-material id="app8">
        <label>Multimedia Appendix 8</label>
        <p>Characteristics of the population of COVID positive patients with hypertension in EDS-COVID.</p>
        <media xlink:href="jmir_v22i8e20773_app8.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">aHR</term>
          <def>
            <p>adjusted hazard ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AP-HP</term>
          <def>
            <p>Assistance Publique – Hôpitaux de Paris</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ASGD</term>
          <def>
            <p>asynchronous stochastic gradient descent</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BiLSTM-CRF</term>
          <def>
            <p>bidirectional long short-term memory with a conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CCB</term>
          <def>
            <p>calcium channel blocker</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CDM</term>
          <def>
            <p>common data model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">COVID-19</term>
          <def>
            <p>coronavirus disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">EDS</term>
          <def>
            <p>Entrepôt de Données de Santé</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ICD-10</term>
          <def>
            <p>International Classification of Disease, Tenth Revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">RT-PCR</term>
          <def>
            <p>reverse transcriptase–polymerase chain reaction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">OMOP</term>
          <def>
            <p>Observational Medical Outcomes Partnership</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank the EDS AP-HP COVID consortium integrating the AP-HP Health Data Warehouse team as well as all the AP-HP staff and volunteers who contributed to the implementation of the EDS-COVID database and operating solutions for the database. The authors would like to acknowledge John Bennett for his thorough editing. This work was supported by state funding from the French National Research Agency (Agence Nationale de la Recherche, ANR) under the “Investissements d’Avenir” program (reference: ANR-10-IAHU-01) and an ANR PractikPharma grant (ANR-15-CE23-0028). The collaborators associated with AP-HP/Universities/INSERM COVID-19 Research Collaboration: AP-HP COVID CDR Initiative, Paris, France, are as follows: Pierre-Yves Ancel, Alain Bauchet, Nathanaël Beeker, Vincent Benoit, Mélodie Bernaux, Ali Bellamine, Romain Bey, Aurélie Bourmaud, Stéphane Breant, Anita Burgun, Fabrice Carrat, Charlotte Caucheteux, Julien Champ, Sylvie Cormont, Christel Daniel, Julien Dubiel, Catherine Duclos, Loic Esteve, Marie Frank, Nicolas Garcelon, Alexandre Gramfort, Nicolas Griffon, Olivier Grisel, Martin Guilbaud, Claire Hassen-Khodja, François Hemery, Martin Hilka, Anne Sophie Jannot, Jerome Lambert, Richard Layese, Judith Leblanc, Léo Lebouter, Guillaume Lemaitre, Damien Leprovost, Ivan Lerner, Kankoe Levi Sallah, Aurélien Maire, Marie-France Mamzer, Patricia Martel, Arthur Mensch, Thomas Moreau, Antoine Neuraz, Nina Orlova, Nicolas Paris, Bastien Rance, Hélène Ravera, Antoine Rozes, Elisa Salamanca, Arnaud Sandrin, Patricia Serre, Xavier Tannier, Jean-Marc Treluyer, Damien van Gysel, Gaël Varoquaux, Jill Jen Vie, Maxime Wack, Perceval Wajsburt, Demian Wassermann and Eric Zapletal.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>AN, IL, AB, NG, and BR contributed to the conception or design of the work. AN, IL, WD, NP, RT, NG, and BR acquired, analyzed, or interpreted the data. AN, IL, WD, NP, AR, DB, NG, and BR created the new software used in the work. AN, IL, AB, NG, RT, BR, and KBC drafted the work or substantively revised it.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Snow</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>On the Mode of Communication of Cholera</source>
          <year>1855</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>Wilson and Ogilvy</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ivanov</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Gesteland</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Olszewski</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Espino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Evaluating natural language processing applications applied to outbreak and disease surveillance</article-title>
          <source>Proceedings of 36th symposium on the interface: computing science and statistics 2004</source>
          <year>2004</year>
          <conf-name>36th Symposium on the Interface: Computing Science and Statistics 2004</conf-name>
          <conf-date>May 26-29, 2004</conf-date>
          <conf-loc>Baltimore, MD</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elkin</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Froehling</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Wahner-Roedler</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>KR</given-names>
            </name>
          </person-group>
          <article-title>Comparison of natural language processing biosurveillance methods for identifying influenza from encounter notes</article-title>
          <source>Ann Intern Med</source>
          <year>2012</year>
          <month>01</month>
          <day>03</day>
          <volume>156</volume>
          <issue>1 Pt 1</issue>
          <fpage>11</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-156-1-201201030-00003</pub-id>
          <pub-id pub-id-type="medline">22213490</pub-id>
          <pub-id pub-id-type="pii">156/1_Part_1/11</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Shang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Calcium channel blocker amlodipine besylate is associated with reduced case fatality rate of COVID-19 patients with hypertension</article-title>
          <source>medRxiv</source>
          <year>2020</year>
          <month>04</month>
          <day>14</day>
          <fpage>preprint</fpage>
          <pub-id pub-id-type="doi">10.1101/2020.04.08.20047134</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</article-title>
          <source>arXivcs</source>
          <year>2018</year>
          <month>10</month>
          <day>10</day>
          <access-date>2018-11-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1810.04805">http://arxiv.org/abs/1810.04805</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lindberg</surname>
              <given-names>DAB</given-names>
            </name>
            <name name-style="western">
              <surname>Humphreys</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>McCray</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System</article-title>
          <source>Methods Inf Med</source>
          <year>2018</year>
          <month>02</month>
          <day>06</day>
          <volume>32</volume>
          <issue>04</issue>
          <fpage>281</fpage>
          <lpage>291</lpage>
          <pub-id pub-id-type="doi">10.1055/s-0038-1634945</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Okazaki</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujii</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Simple and Efficient Algorithm for Approximate Dictionary Matching</article-title>
          <source>Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)</source>
          <year>2010</year>
          <conf-name>23rd International Conference on Computational Linguistics (Coling 2010)</conf-name>
          <conf-date>August 2010</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <publisher-name>Coling 2010 Organizing Committee</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/C10-1096"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lample</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ballesteros</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawakami</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dyer</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Neural Architectures for Named Entity Recognition</article-title>
          <source>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2016</year>
          <conf-name>2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 2016</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>A</fpage>
          <pub-id pub-id-type="doi">10.18653/v1/n16-1030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jouffroy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lerner</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Rance</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Burgun</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Neuraz</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>MedExt: combining expert knowledge and deep learning for medication extraction from French clinical texts</article-title>
          <source>ResearchGate</source>
          <year>2020</year>
          <month>01</month>
          <fpage>preprint</fpage>
          <pub-id pub-id-type="doi">10.2196/preprints.17934</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Akbik</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bergmann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Blythe</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rasul</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schweter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vollgraf</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>FLAIR: An Easy-to-Use Framework for State-of-the-Art NLP</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations)</source>
          <year>2019</year>
          <conf-name>2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations)</conf-name>
          <conf-date>June 2019</conf-date>
          <conf-loc>Minneapolis, MI</conf-loc>
          <publisher-loc>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations) Internet Minneapolis, Minnesota</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <pub-id pub-id-type="doi">10.18653/v1/n19-4010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garcelon</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Neuraz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Benoit</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Salomon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Burgun</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Improving a full-text search engine: the importance of negation detection and family history context to identify cases in a biomedical data warehouse</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>05</month>
          <day>01</day>
          <volume>24</volume>
          <issue>3</issue>
          <fpage>607</fpage>
          <lpage>613</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw144</pub-id>
          <pub-id pub-id-type="medline">28339516</pub-id>
          <pub-id pub-id-type="pii">2433511</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Huser</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Suchard</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>ICK</given-names>
            </name>
            <name name-style="western">
              <surname>Rijnbeek</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>van der Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Norén</surname>
              <given-names>GN</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Stang</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Madigan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
          </person-group>
          <article-title>Observational Health Data Sciences and Informatics (OHDSI): Opportunities for Observational Researchers</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2015</year>
          <volume>216</volume>
          <fpage>574</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26262116"/>
          </comment>
          <pub-id pub-id-type="medline">26262116</pub-id>
          <pub-id pub-id-type="pmcid">PMC4815923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <article-title>Soumettre un projet de recherche au Comité Scientifique et Ethique de l’Entrepôt de Données de Santé</article-title>
          <source>Assistance Publique — Hôpitaux de Paris</source>
          <access-date>2020-08-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://recherche.aphp.fr/eds/recherche/">https://recherche.aphp.fr/eds/recherche/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>Regression Models and Life-Tables</article-title>
          <source>J R Stat Soc Series B Stat Methodol</source>
          <year>2018</year>
          <month>12</month>
          <day>05</day>
          <volume>34</volume>
          <issue>2</issue>
          <fpage>187</fpage>
          <lpage>202</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1972.tb00899.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>EMA Regulatory Science to 2025: Strategic reflection</article-title>
          <source>European Medicines Agency</source>
          <year>2018</year>
          <access-date>2020-08-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ema.europa.eu/en/documents/regulatory-procedural-guideline/ema-regulatory-science-2025-strategic-reflection_en.pdf">https://www.ema.europa.eu/en/documents/regulatory-procedural-guideline/ema-regulatory-science-2025-strategic-reflection_en.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pizer</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>Falsification Testing of Instrumental Variables Methods for Comparative Effectiveness Research</article-title>
          <source>Health Serv Res</source>
          <year>2016</year>
          <month>04</month>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>790</fpage>
          <lpage>811</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26293167"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/1475-6773.12355</pub-id>
          <pub-id pub-id-type="medline">26293167</pub-id>
          <pub-id pub-id-type="pmcid">PMC4799892</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
