<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v19i5e162</article-id>
    <article-id pub-id-type="pmid">28506958</article-id>
    <article-id pub-id-type="doi">10.2196/jmir.6887</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Enhancing Comparative Effectiveness Research With Automated Pediatric Pneumonia Detection in a Multi-Institutional Clinical Repository: A PHIS+ Pilot Study</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Parra-Calderón</surname>
          <given-names>Carlos Luis</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Cruz-Díaz</surname>
          <given-names>Noa Patricia</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>op den Buijs</surname>
          <given-names>Jorn</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes">
      <name name-style="western">
        <surname>Meystre</surname>
        <given-names>Stephane</given-names>
      </name>
      <degrees>MD, PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>Medical University of South Carolina</institution>
        <addr-line>135 Cannon St, 4th Floor</addr-line>
        <addr-line>Charleston, SC,</addr-line>
        <country>United States</country>
        <phone>1 843 792 0015</phone>
        <fax>1 843 792 5587</fax>
        <email>meystre@musc.edu</email>
      </address>  
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7632-9625</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Gouripeddi</surname>
          <given-names>Ramkiran</given-names>
        </name>
        <degrees>MBBS, MS</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-4345-9669</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Tieder</surname>
          <given-names>Joel</given-names>
        </name>
        <degrees>MPH, MD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6539-7115</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Simmons</surname>
          <given-names>Jeffrey</given-names>
        </name>
        <degrees>MSc, MD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-5069-4491</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Srivastava</surname>
          <given-names>Rajendu</given-names>
        </name>
        <degrees>MPH, MD</degrees>
        <xref rid="aff5" ref-type="aff">5</xref>
        <xref rid="aff6" ref-type="aff">6</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-0649-6222</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib6">
        <name name-style="western">
          <surname>Shah</surname>
          <given-names>Samir</given-names>
        </name>
        <degrees>MSCE, MD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7902-7000</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
      <sup>1</sup>
      <institution>Medical University of South Carolina</institution>
      <addr-line>Charleston, SC</addr-line>
      <country>United States</country>
    </aff>
    <aff id="aff2">
    <sup>2</sup>
    <institution>Department of Biomedical Informatics</institution>
    <institution>University of Utah</institution>  
    <addr-line>Salt Lake City, UT</addr-line>
    <country>United States</country></aff>
    <aff id="aff3">
      <sup>3</sup>
      <institution>Seattle Children’s Hospital and University of Washington</institution>
      <addr-line>Seattle, WA</addr-line>
      <country>United States</country>
    </aff>
    <aff id="aff4">
      <sup>4</sup>
      <institution>Cincinnati Children’s Hospital Medical Center</institution>
      <addr-line>Cincinnati, OH</addr-line>
      <country>United States</country>
    </aff>
    <aff id="aff5">
    <sup>5</sup>
    <institution>Department of Pediatrics</institution>
    <institution>University of Utah</institution>  
    <addr-line>Salt Lake City, UT</addr-line>
    <country>United States</country></aff>
    <aff id="aff6">
      <sup>6</sup>
      <institution>Primary Children's Hospital</institution>
      <addr-line>Salt Lake City, UT</addr-line>
      <country>United States</country>
    </aff>
    <author-notes>
      <corresp>Corresponding Author: Stephane Meystre 
      <email>meystre@musc.edu</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><month>05</month><year>2017</year></pub-date>
    <pub-date pub-type="epub">
      <day>15</day>
      <month>05</month>
      <year>2017</year>
    </pub-date>
    <volume>19</volume>
    <issue>5</issue>
    <elocation-id>e162</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>25</day>
        <month>10</month>
        <year>2016</year>
      </date>
      <date date-type="rev-request">
        <day>18</day>
        <month>12</month>
        <year>2016</year>
      </date>
      <date date-type="rev-recd">
        <day>26</day>
        <month>1</month>
        <year>2017</year>
      </date>
      <date date-type="accepted">
        <day>6</day>
        <month>3</month>
        <year>2017</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Stephane Meystre, Ramkiran Gouripeddi, Joel Tieder, Jeffrey Simmons, Rajendu Srivastava, Samir Shah. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 15.05.2017.</copyright-statement>
    <copyright-year>2017</copyright-year>
    <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/2.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://www.jmir.org/2017/5/e162/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>Community-acquired pneumonia is a leading cause of pediatric morbidity. Administrative data are often used to conduct comparative effectiveness research (CER) with sufficient sample sizes to enhance detection of important outcomes. However, such studies are prone to misclassification errors because of the variable accuracy of discharge diagnosis codes.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>The aim of this study was to develop an automated, scalable, and accurate method to determine the presence or absence of pneumonia in children using chest imaging reports.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>The multi-institutional PHIS+ clinical repository was developed to support pediatric CER by expanding an administrative database of children’s hospitals with detailed clinical data. To develop a scalable approach to find patients with bacterial pneumonia more accurately, we developed a Natural Language Processing (NLP) application to extract relevant information from chest diagnostic imaging reports. Domain experts established a reference standard by manually annotating 282 reports to train and then test the NLP application. Findings of pleural effusion, pulmonary infiltrate, and pneumonia were automatically extracted from the reports and then used to automatically classify whether a report was consistent with bacterial pneumonia.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>Compared with the annotated diagnostic imaging reports reference standard, the most accurate implementation of machine learning algorithms in our NLP application allowed extracting relevant findings with a sensitivity of .939 and a positive predictive value of .925. It allowed classifying reports with a sensitivity of .71, a positive predictive value of .86, and a specificity of .962. When compared with each of the domain experts manually annotating these reports, the NLP application allowed for significantly higher sensitivity (.71 vs .527) and similar positive predictive value and specificity <italic>.</italic></p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>NLP-based pneumonia information extraction of pediatric diagnostic imaging reports performed better than domain experts in this pilot study. NLP is an efficient method to extract information from a large collection of imaging reports to facilitate CER.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>natural language processing</kwd>
      <kwd>pneumonia, bacterial</kwd>
      <kwd>medical informatics</kwd>
      <kwd>comparative effectiveness research</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Community-acquired pneumonia (CAP) is a leading cause of hospitalization among children in the United States [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Despite this prevalence, the effectiveness of common management strategies [<xref ref-type="bibr" rid="ref3">3</xref>] is unknown. Multicenter studies using administrative data are inexpensive to conduct and could help compare treatment effectiveness and overcome the challenge of measuring adverse outcomes [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. However, these studies are limited by the potential for subject misclassification. International Classification of Diseases, 9th revision, Clinical Modification (ICD-9-CM) discharge diagnosis codes are commonly used to identify patients [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Improper use of these codes may lead to false positive or false negative cases [<xref ref-type="bibr" rid="ref6">6</xref>]. In studies of pediatric CAP, this might lead to systematic biasing by inadvertently including patients without pneumonia or excluding patients with pneumonia in the study cohort [<xref ref-type="bibr" rid="ref7">7</xref>]. Furthermore, use of these discharge diagnosis codes only precludes more accurate risk adjustment than might be available through admission chest radiograph results, for example [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>The PHIS+ repository augments the Pediatric Health Information System (PHIS), an administrative database from the Children’s Hospital Association, with clinical data [<xref ref-type="bibr" rid="ref9">9</xref>]. PHIS+, consists of laboratory [<xref ref-type="bibr" rid="ref9">9</xref>] and microbiological testing results [<xref ref-type="bibr" rid="ref10">10</xref>], as well as imaging reports from 6 pediatric hospitals across multiple care settings (inpatient, outpatient, emergency department, and ambulatory surgery) over a 5-year study period. The clinical data in the PHIS+ repository are standardized and harmonized using biomedical terminologies and common data models. But, unlike laboratory results, which are available in discrete formats for comparative effectiveness research analyses, imaging reports are available only in narrative clinical text and lack standardization in structure and format. To allow for efficient and rapid access to these data, we developed a Natural Language Processing (NLP) application to determine the diagnosis of bacterial pneumonia from pediatric diagnostic imaging reports by extracting pneumonia characteristics (ie, presence, symmetry, and size of pleural effusion and pulmonary infiltrate) [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      <p>NLP has been used to extract different types of clinical information from various sources of narrative text in adult patients [<xref ref-type="bibr" rid="ref12">12</xref>]. Studies have applied Bayesian networks and NLP to detect bacterial pneumonia in adults [<xref ref-type="bibr" rid="ref13">13</xref>], and several used an NLP application called MedLEE [<xref ref-type="bibr" rid="ref14">14</xref>] to extract community-acquired pneumonia severity scores in adults [<xref ref-type="bibr" rid="ref15">15</xref>] and pneumonia information from chest radiology reports in a neonatal intensive care unit [<xref ref-type="bibr" rid="ref16">16</xref>], or to identify patients with tuberculosis [<xref ref-type="bibr" rid="ref17">17</xref>]. Recent efforts applied NLP to extract pneumonia information from radiology reports in an adult intensive care unit [<xref ref-type="bibr" rid="ref18">18</xref>], detect probable pneumonia cases and help manual chart review [<xref ref-type="bibr" rid="ref19">19</xref>], and also included electronic health record structured data to detect pneumonia cases [<xref ref-type="bibr" rid="ref20">20</xref>]. These studies reported accuracy metrics with large variations, sensitivity ranging from .45 to .95, and positive predictive value (PPV) from .075 to .86 (best PPV was .86 with a sensitivity of .75 [<xref ref-type="bibr" rid="ref18">18</xref>], and best sensitivity was .95 with a PPV of .78 [<xref ref-type="bibr" rid="ref13">13</xref>]). They typically focused on only one type of clinical note, at only one health care organization or hospital, and included the complete development of large complex NLP systems. Only one of these prior studies included children evaluated for pneumonia [<xref ref-type="bibr" rid="ref19">19</xref>], but it required a manual review of a subset of the radiology reports already analyzed by the NLP system. A good recent review of NLP applications to radiology reports can be found in [<xref ref-type="bibr" rid="ref21">21</xref>]. The goal of this study was to develop an automated, scalable, and accurate method to determine the presence or absence of pneumonia in children, using a large variety of chest imaging reports from the newly developed PHIS+ repository in order to facilitate the conduct of adequately powered comparative effectiveness research aimed for treatment options of hospitalized children.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Sites</title>
        <p>Six free-standing children’s hospitals were included: Boston Children’s Hospital (Boston, MA, USA); Children’s Hospital of Philadelphia (Philadelphia, PA, USA); Children’s Hospital of Pittsburgh (Pittsburgh, PA, USA); Cincinnati Children’s Hospital Medical Center (Cincinnati, OH, USA); Primary Children’s Hospital, Intermountain Healthcare (Salt Lake City, UT, USA); and Seattle Children’s Hospital (Seattle, WA, USA).</p>
      </sec>
      <sec>
        <title>Reference Standard Preparation</title>
        <p>The imaging procedures from the six contributing hospitals in the PHIS+ repository were already mapped to Current Procedural Terminology (CPT) codes [<xref ref-type="bibr" rid="ref22">22</xref>]. We first selected relevant chest diagnostic imaging (chest radiograph, computerized tomography, and ultrasound) procedure CPT codes (see <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>), and then extracted a stratified random collection of imaging study reports mapped to these CPT codes. One report was extracted for each randomly selected patient. A preliminary power analysis indicated that a selection of 270 imaging reports would allow a 95% CI of ±4% width with an expected sensitivity of 90%, assuming mention of pneumonia in 25% of the reports (pneumonia is the information we extracted mentioned the least frequently). A total of 282 reports were eventually selected, deidentified using De-ID software (DE-ID Data Corp) [<xref ref-type="bibr" rid="ref23">23</xref>] and provided as plain text files for NLP-based information extraction.</p>
      </sec>
      <sec>
        <title>Reference Standard Annotation</title>
        <p>The 282 deidentified diagnostic imaging reports were annotated by domain experts to evaluate the pneumonia information extraction application. Annotations included all mentions of pulmonary infiltrate, their local context (eg, negation, as in “no infiltrate”), and their symmetry (ie, unilateral or bilateral); pleural effusions, their local context, and their size (ie, small or moderate or large); mentions of pneumonia and their local context (eg, “consistent with pneumonia” or “no evidence of pneumonia”); and whether the report supported the diagnosis of bacterial pneumonia (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p>
        <p>The domain experts, three attending pediatric hospital medicine physicians, were trained while also iteratively refining the annotation instructions on the basis of their experience. They first annotated a set of 15 reports, with low interannotator agreement. Examples of disagreements between domain experts are listed in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p>
        <p>After having discussed disagreements and updated the annotation instructions, they annotated a second set of 10 other reports and reached fair agreement (pairwise proportions of agreement: .65-.78 for infiltrates, .12-.7 for effusions, and .43-.74 for mentions of pneumonia). Finally, after a final round of disagreement discussions and instructions refinement, they annotated 10 new reports and reached excellent agreement (.96-.98 for infiltrates, .94-1 for effusions, and .92-1 for mentions of pneumonia). The training phase then ended, and annotation of the complete 282 reports collection followed (including reannotation of the initial 15+10+10 reports). At this stage, the rare disagreements were discussed among all domain experts to reach consensus for the reference standard. The annotated information included the following (<xref ref-type="fig" rid="figure1">Figure 1</xref>; Final annotation guideline in <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>):</p>
        <list list-type="bullet">
          <list-item>
            <p>Mentions of “pneumonia” (or synonyms—eg, “pneumonitis”), without adjectives (except if required to define the concept; eg, “lung infection” needs “lung” to be precise enough).</p>
          </list-item>
          <list-item>
            <p>Mentions of “pleural effusion” (or synonyms—eg, “empyema”; or terms that imply the existence of a pleural effusion if “pleural effusion” or a synonym is not mentioned—eg, “loculation,” “free fluid”), without adjectives.</p>
          </list-item>
          <list-item>
            <p>Mentions of “pulmonary infiltrate” (or synonyms like “opacity,” “consolidation”), without adjectives or remote synonyms like “small airways disease,” “interstitial markings,” “peribronchial thickening,” or “atelectasis.”</p>
          </list-item>
          <list-item>
            <p>Context surrounding each pneumonia, effusion, or infiltrate annotation (referred to as “local context”) was annotated as <italic>present</italic> (ie, affirmed, not negated, current), <italic>absent</italic> (ie, negated, excluded), <italic>speculative</italic> (ie, hypothetical, a possibility, to rule it out), or <italic>historical</italic> (ie, in the past, not current anymore).</p>
          </list-item>
          <list-item>
            <p>Pleural effusion size was annotated as <italic>small</italic>, <italic>moderate-large</italic>, or <italic>not mentioned</italic>.</p>
          </list-item>
          <list-item>
            <p>Symmetry of infiltrates was annotated as <italic>unilateral</italic>, <italic>bilateral</italic>, or <italic>not mentioned</italic>.</p>
          </list-item>
          <list-item>
            <p>Overall, each report was annotated as to whether it did or did not generally support the diagnosis of bacterial pneumonia (true or false).</p>
          </list-item>
        </list>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Diagnostic imaging report annotations example.</p>
          </caption>
          <graphic xlink:href="jmir_v19i5e162_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Examples of domain expert annotation disagreements.</p>
          </caption>
          <graphic xlink:href="jmir_v19i5e162_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Clinical Information Extraction Application Development</title>
        <p>We developed an application based on NLP to automate the extraction of information. This application was based on the Apache UIMA (Unstructured Information Management Architecture) framework [<xref ref-type="bibr" rid="ref24">24</xref>] using components either developed specifically for this application or adapted from another NLP application: Textractor [<xref ref-type="bibr" rid="ref25">25</xref>]. Components included text preprocessing (sections detection, lists annotation, sentence segmentation, tokenization, part-of-speech tagging, and chunking), dictionary look-up, local context analysis, annotation attributes and patient information (hospital and patient code) extraction, machine learning features extraction, and the final classification (<xref ref-type="fig" rid="figure3">Figure 3</xref>).</p>
        <p>During text preprocessing, sections were detected using a collection of regular expressions representing possible headers for patient history sections. Lists were also detected using regular expressions, and their entries segmented as individual sentences. Segmentation of the text in sentences was adapted from Textractor, which is based on a machine learning algorithm (maximum entropy, MaxEnt [<xref ref-type="bibr" rid="ref26">26</xref>]). Sentences are then “tokenized,” split in words or other meaningful groups of alphabetical or numeric characters. Each token is then assigned a part-of-speech tag with another module adapted from Textractor that is based on maximum entropy (itself adapted from OpenNLP [<xref ref-type="bibr" rid="ref26">26</xref>]). Finally, noun phrase “chunks” are detected with a third module adapted from Textractor, which is also based on maximum entropy (also originally adapted from OpenNLP [<xref ref-type="bibr" rid="ref26">26</xref>]).</p>
        <p>The dictionary lookup module searches a list of terms for matches with the noun phrase “chunks” detected in the text. The list of terms (ie, dictionary) was originally based on a subset of the Unified Medical Language System (UMLS) Metathesaurus [<xref ref-type="bibr" rid="ref27">27</xref>] filtered by semantic type to include only disease or syndrome, finding, or pathologic function. This dictionary was later replaced with a list of terms built manually by clinicians (based on their domain knowledge), an approach that allowed for improved accuracy.</p>
        <p>The local context analysis was based on the ConText algorithm [<xref ref-type="bibr" rid="ref28">28</xref>], as implemented in Textractor. This algorithm looks for keywords that indicate local context such as negation (eg, denied, no, absent), and then assigns this context to concepts found in a window of words following or preceding the keyword. For example, in the sentence “Findings consistent with viral or reactive airways disease without focal pneumonia,” the keyword “without” indicates negation and precedes the annotated concept “pneumonia,” which will therefore be considered negated, or absent.</p>
        <p>The extraction of annotation attributes (effusion size and infiltrate symmetry) and patient information (hospital and patient code) was based on a set of regular expressions developed specifically and implemented similarly to ConText, assigning these attributes to the appropriate annotated concepts.</p>
        <p>Finally, the classification of reports as supporting the diagnosis of bacterial pneumonia (or not) was based on a Support Vector Machine (SVM) classifier with lexical and semantic features. These features included a “bag-of-words” (ie, list of words occurring more than once in our reports collection, without stopwords like “and,” “from,” “each”) and the annotated concepts with their attributes (eg, “pleural effusion” annotation with “small” quantity attribute). The classifier was an implementation of LIBSVM [<xref ref-type="bibr" rid="ref29">29</xref>], with the radial basis function (RBF) kernel.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Components of the pneumonia clinical information extraction application.</p>
          </caption>
          <graphic xlink:href="jmir_v19i5e162_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Application Performance Improvements</title>
        <p>When initially evaluating the pneumonia classification accuracy, sensitivity was not satisfactory. Therefore, we compared several different machine learning algorithms, refined parameters for the SVM, and filtered the machine learning features (bag-of-words), as well as the dictionaries used by our application.</p>
        <p>Machine learning algorithms compared included decision trees, rule learners, naïve Bayes, Bayesian networks, and SVMs, all implemented in the Weka software (version 3.7; University of Waikato, New Zealand) [<xref ref-type="bibr" rid="ref30">30</xref>]. Features used were the same with each algorithm and included the annotated concepts, and their attributes and local context. Refining the SVM parameters (ie, the penalty parameter <italic>C</italic>, and the radial basis function parameter gamma; final values allowing for best accuracy: <italic>C</italic>=11.5, gamma=.1) consisted in realizing a grid search for selecting the best values of these parameters (using the Grid Parameter Search tool available with LIBSVM).</p>
        <p>The “bag-of-words” is an important set of features for machine learning, and the original version included 2103 different words. Even after excluding stopwords, most remaining words have no meaning associated with the diagnosis or radiological signs of pneumonia. To focus our classification on more meaningful words for our task, we manually reviewed all words in the initial bag-of-words (named BOW0) and created three versions with increasing levels of domain specificity. The first refined bag-of-words (BOW1) included 99 words, the second (more specific) bag-of-words (BOW2) included 37 words, and the third (most specific) bag-of-words (BOW3) included only 23 words. The three refined bag-of-words are listed in <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>. All were annotated as unigrams.</p>
        <p>Finally, refining our dictionary of terms focused on mentions of pulmonary infiltrate, removing terms that caused many false positive matches, but few correct matches.</p>
      </sec>
      <sec>
        <title>Performance Evaluation Approach</title>
        <p>We used a cross validation approach with 5 “folds” for training and validation. This approach starts with random partition of our collection of 282 notes into 5 subsets of approximately the same size. Then, one subset is retained for testing and the remaining four subsets are used for training. This process is repeated 5 times (ie, “folds”), with each subset used only once for testing. In each “fold,” we compared the information extraction application output with the manual reference standard annotations, and classified each annotation as true positive (application output matches the reference standard), false positive (application output not found in the reference standard), or false negatives (reference standard annotation missed by the application). We also counted true negatives for the overall classification when the reference standard and the application both classified the report as not supporting the diagnosis of bacterial pneumonia. Finally, we used counts of true positives, true negatives, false positives, and false negatives, and computed various accuracy metrics at the end of the whole process (not after each fold and then averaged across folds). Accuracy metrics included sensitivity (ie, recall), positive predictive value (ie, precision), the <italic>F</italic><sub>1</sub>-measure (a harmonic mean of sensitivity and positive predictive value [<xref ref-type="bibr" rid="ref31">31</xref>]), and the accuracy (proportion of agreement) of the local context category and the attributes category (effusion size and infiltrate symmetry).</p>
        <p>For the concept-level evaluation, application automatic annotations and reference standard manual annotations were compared and considered a match when the annotated text overlapped exactly (except preceding or following white space or punctuation) and the annotated information categories (eg, “Effusion”) were the same. For the document-level evaluation, reports were classified as supporting the diagnosis of bacterial pneumonia or not. They were considered a match when their binary classification corresponded to the reference standard classification. For document-level evaluation of domain experts, their initial classification (ie, before adjudication of differences between annotators and reference standard development) were compared with final reference standard classifications.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Reference Standard Development</title>
        <p>The 282 radiology imaging reports annotated, originated from each of the 6 health care organizations in approximately the same numbers (48 from the Boston Children’s Hospital, 48 from the Children’s Hospital of Philadelphia, 47 from the Children’s Hospital of Pittsburgh, 48 from the Cincinnati Children’s Hospital Medical Center, 47 from the Primary Children’s Hospital, and 44 from the Seattle Children’s Hospital). Annotations included 72 mentions of pneumonia or synonyms (0.255 per report on average), 312 mentions of pulmonary infiltrate or synonyms (1.106), and 369 mentions of pleural effusion or synonyms (1.309). Among the 282 reports, 24.5% (69/282) supported the diagnosis of bacterial pneumonia. Agreement among annotators for the 247 (282 minus 35 reports used for annotators training) not previously seen imaging reports reached 82 of 121 pneumonia mentions (67.8%), 502 of 610 infiltrate mentions (82.3%), and 526 of 670 effusion mentions (78.5%).</p>
      </sec>
      <sec>
        <title>Performance at the Concept Level</title>
        <p>Concepts evaluated here included the automatic annotations by our application of mentions of pneumonia, pleural effusion, pulmonary infiltrate, and corresponding local context and attributes. The average sensitivity and positive predictive value were approximately 93-94%, with higher accuracy for mentions of pneumonia, and lower accuracy for mentions of pleural effusion (<xref ref-type="table" rid="table1">Table 1</xref>). The local context was correct in about 92% (65/71) to 94.1% (272/289) of the cases, and the attribute category in about 72.3% (209/289) to 92.5% (321/347) of the cases.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Concept level accuracy evaluation results.</p>
          </caption>
          <table width="500" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="205"/>
            <col width="75"/>
            <col width="75"/>
            <col width="75"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Metrics</td>
                <td colspan="4">Terms mentioned in radiology imaging reports</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Pneumonia</td>
                <td>Infiltrate</td>
                <td>Effusion</td>
                <td>All included terms</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>True positives</td>
                <td>71</td>
                <td>289</td>
                <td>347</td>
                <td>707</td>
              </tr>
              <tr valign="top">
                <td>False positives</td>
                <td>0</td>
                <td>20</td>
                <td>37</td>
                <td>57</td>
              </tr>
              <tr valign="top">
                <td>False negatives</td>
                <td>1</td>
                <td>23</td>
                <td>22</td>
                <td>46</td>
              </tr>
              <tr valign="top">
                <td>Sensitivity</td>
                <td>.986</td>
                <td>.926</td>
                <td>.940</td>
                <td>.939</td>
              </tr>
              <tr valign="top">
                <td>Positive predictive value</td>
                <td>1.000</td>
                <td>.935</td>
                <td>.904</td>
                <td>.925</td>
              </tr>
              <tr valign="top">
                <td><italic>F</italic><sub>1</sub>-measure<sup>a</sup></td>
                <td>.993</td>
                <td>.931</td>
                <td>.922</td>
                <td>.932</td>
              </tr>
              <tr valign="top">
                <td>Context accuracy</td>
                <td>.916</td>
                <td>.941</td>
                <td>.931</td>
                <td>.929</td>
              </tr>
              <tr valign="top">
                <td>Attribute accuracy</td>
                <td>N/A<sup>b</sup></td>
                <td>.723</td>
                <td>.925</td>
                <td>.824</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup><italic>F</italic><sub>1</sub>-measure is a harmonic mean of sensitivity and positive predictive value [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Performance at the Document Level</title>
        <p>This classification was evaluated with various configurations of our application. Sensitivity was quite low (.42) with our initial configuration (<xref ref-type="table" rid="table2">Table 2</xref>), motivating us to experiment with the aforementioned performance improvement approaches.</p>
        <p>When using the SVM classifier with all features (ie, concepts with local context and attributes, and bag-of-words), the more specific bag-of-words (BOW2 and BOW3) allowed for higher positive predictive value and specificity, but sensitivity was the highest at .652 with the least filtered bag-of-words (BOW1). The configuration allowing for the highest sensitivity and <italic>F</italic><sub>1</sub>-measure was based on the least filtered bag-of-words and a refined dictionary (Best system in <xref ref-type="table" rid="table2">Table 2</xref>).</p>

        <p>We also compared different machine learning algorithms with a limited set of features (ie, no bag-of-words as not all algorithms tested could use it). Most of them allowed for higher sensitivity than the SVM algorithm (as implemented in Weka sequential minimal optimization [SMO] [<xref ref-type="bibr" rid="ref32">32</xref>]), but their positive predictive value was always lower (see <xref ref-type="app" rid="app4">Multimedia Appendix 4</xref>).</p>

        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Document-level classification results.</p>
          </caption>
          <table width="500" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="115"/>
            <col width="50"/>
            <col width="50"/>
            <col width="50"/>
            <col width="75"/>
            <col width="75"/>
            <col width="85"/>
            <thead>
              <tr valign="top">
                <td>Metrics</td>
                <td>BOW0<sup>a</sup></td>
                <td>BOW1<sup>b</sup></td>
                <td>BOW2<sup>c</sup></td>
                <td>BOW3<sup>d</sup></td>
                <td>Best system<sup>e</sup><break/>(95% CI)</td>
                <td>Domain experts average</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>True positives</td>
                <td>29</td>
                <td>45</td>
                <td>31</td>
                <td>30</td>
                <td>49</td>
                <td>36</td>
              </tr>
              <tr valign="top">
                <td>True negatives</td>
                <td>207</td>
                <td>200</td>
                <td>210</td>
                <td>209</td>
                <td>205</td>
                <td>206</td>
              </tr>
              <tr valign="top">
                <td>False positives</td>
                <td>6</td>
                <td>13</td>
                <td>3</td>
                <td>4</td>
                <td>8</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>False negatives</td>
                <td>40</td>
                <td>24</td>
                <td>38</td>
                <td>39</td>
                <td>20</td>
                <td>33</td>
              </tr>
              <tr valign="top">
                <td>Sensitivity</td>
                <td>.420</td>
                <td>.652</td>
                <td>.449</td>
                <td>.435</td>
                <td>.710 <break/>(.683-.737)</td>
                <td>.527</td>
              </tr>
              <tr valign="top">
                <td>Positive predictive value</td>
                <td>.829</td>
                <td>.776</td>
                <td>.912</td>
                <td>.882</td>
                <td>.860 <break/>(.833-.886)</td>
                <td>.848</td>
              </tr>
              <tr valign="top">
                <td><italic>F</italic><sub>1</sub> measure</td>
                <td>.556</td>
                <td>.709</td>
                <td>.602</td>
                <td>.583</td>
                <td>.778</td>
                <td>.650</td>
              </tr>
              <tr valign="top">
                <td>Specificity</td>
                <td>.972</td>
                <td>.939</td>
                <td>.986</td>
                <td>.981</td>
                <td>.962 <break/>(.951-.974)</td>
                <td>.966</td>
              </tr>
              <tr valign="top">
                <td>Accuracy</td>
                <td>.837</td>
                <td>.869</td>
                <td>.855</td>
                <td>.847</td>
                <td>.901 <break/>(.883-.918)</td>
                <td>.862</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup> BOW0: Initial bag-of-words.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>BOW1: First refined bag-of-words.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>BOW2: Second (more specific) refined bag-of-words.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>BOW3: Third (most specific) refined bag-of-words.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>BOW1 with refined dictionary.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>

        <p>The decision tree algorithm (pruned C4.5 decision tree [<xref ref-type="bibr" rid="ref33">33</xref>]) automatically created the decision tree and allowed for a classification <italic>F</italic><sub>1</sub>-measure of .552 (<xref ref-type="fig" rid="figure4">Figure 4</xref>).</p>
        <p>The rule learner (Repeated Incremental Pruning to Produce Error Reduction [RIPPER] [<xref ref-type="bibr" rid="ref34">34</xref>]) automatically learned three rules that allowed for a classification <italic>F</italic><sub>1</sub>-measure of .613:</p>
        <list list-type="bullet">
          <list-item>
            <p>IF (Effusion=Present) AND (Symmetry=Unilateral) THEN Supports pneumonia=Yes</p>
          </list-item>
          <list-item>
            <p>IF (Infiltrate=Present) AND (Pneumonia mention=Present) THEN Supports pneumonia=Yes</p>
          </list-item>
          <list-item>
            <p>OTHERWISE Supports pneumonia=No</p>
          </list-item>
        </list>
        <p>The Naïve Bayes algorithm implemented in Weka is based on John and Langley algorithm [<xref ref-type="bibr" rid="ref35">35</xref>] and the Bayesian network implementation is based on several different algorithms such as Cooper K2 algorithm [<xref ref-type="bibr" rid="ref36">36</xref>]. The Bayesian network allowed for the highest sensitivity (.739).</p>
        <p>In Weka, the SVM implements John Platt's sequential minimal optimization (SMO) algorithm [<xref ref-type="bibr" rid="ref32">32</xref>]. In our experiment, where the bag-of-words was not part of the features used here, it reached the highest positive predictive value (.811), but also had low sensitivity.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Pruned decision tree for pneumonia classification.</p>
          </caption>
          <graphic xlink:href="jmir_v19i5e162_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>The most common errors our application made were false negatives, erroneously classifying reports as not supporting the diagnosis of bacterial pneumonia when they actually did support it. Among the 20 false negatives, most were cases of pneumonia that were not as clear, with only 48% of the expert annotators originally agreeing that they were positive cases. This average agreement was 86% for cases that were correctly classified. Most false negatives had no pleural effusion and some had infiltrates mentioned as “airspace disease,” which domain experts specifically decided to exclude as a clear indicator of bacterial pneumonia. Others had pleural effusions mentioned as “fluid” (without the mention of “pleural”), which were difficult to differentiate from other fluid locations in the thorax.</p>
        <p>False positive errors (ie, erroneously classifying reports as supporting the diagnosis of bacterial pneumonia when they actually did not support it) were rarer, often caused by local context analysis errors (eg, “pleural effusion has completely resolved” not recognized as an absence of pleural effusion).</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings and Comparison With Prior Work</title>
        <p>The most accurate version of our NLP-based pneumonia information extraction application performed better than human domain experts, with significantly higher sensitivity (Fisher exact test, with <italic>P</italic>=.04.</p>
        <p>We found variation in the language used in chest imaging reports both within and across the six children’s hospitals. This was due to inherent differences in imaging modalities, radiologists reporting, and hospital practice. Despite this variability in language, the most accurate version of our NLP-based diagnostic imaging reports classification application eventually reached a sensitivity of .71, positive predictive value of .86, and a specificity of .96. It was based on an SVM classifier with a refined set of features that included a filtered bag-of-words of 99 words, and the annotated concepts with their attributes. When tested in its first version, it only reached a sensitivity of .42.</p>
        <p>Experiments to improve classification accuracy included refining the features and parameters used by the SVM classifier, and testing other algorithms. These algorithms included decision trees, rule learners, naïve Bayes, Bayesian networks, and SVMs. They allowed for sensitivity between .42 and .74, positive predictive value between .66 and .81, and specificity between .88 and .97. Even if the Bayesian network reached a slightly higher sensitivity than the most accurate version of our classifier (.739 vs .71), its positive predictive value was significantly lower (.78 vs .86), and the overall accuracy and <italic>F</italic><sub>1</sub>-measure were therefore lower. These metrics are consistent with or significantly better than earlier studies such as the extraction of pneumonia information from chest radiology reports in a neonatal intensive care unit by Mendonça and colleagues [<xref ref-type="bibr" rid="ref16">16</xref>], who reported .71 sensitivity but only .075 positive predictive value, or the extraction of pneumonia findings from chest radiology reports by Fiszman and colleagues [<xref ref-type="bibr" rid="ref37">37</xref>], who reported .90 positive predictive value but only .34 sensitivity.</p>
        <p>The performance reached by the most accurate version of our NLP-based reports classification application may seem low when considering the classification task it performed (ie, classifying diagnostic imaging reports as supporting the diagnosis of bacterial pneumonia or not), but this task was actually more difficult than it may appear. When comparing the three domain experts (ie, attending physicians) annotating these reports with the final reference standard, their average sensitivity was lower than the automatic classifier (<xref ref-type="table" rid="table2">Table 2</xref>). The positive predictive value and specificity were comparable. This comparison demonstrates the difficulty of the classification task, and the excellent performance of our application when compared with human experts.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our evaluation had several limitations. First, although we had a small sample of annotated diagnostic imaging reports, this sample size allowed for CIs between .023 and .054 only (95% CI; <xref ref-type="table" rid="table2">Table 2</xref>). This pilot study only included imaging reports from 282 patients, but allowed for sufficiently precise assessment of the accuracy of our system to then apply it to a much larger population of more than 10,000 patients. Comparing our approach with domain experts would benefit from increased precision and could be based on an additional evaluation based on a new larger testing set. Next, the 5-fold cross-validation approach we used only yields meaningful results if the testing set and training set are drawn from the same population, which was our case (both were randomly drawn from our collection of diagnostic imaging reports). Cross-validation could also be misused if selecting features using the complete dataset, and using some data for both training and testing. We avoided both problems by selecting features manually (without examining the dataset, only the experts’ domain knowledge), and by ensuring that each report was used only exactly once for testing in our cross-validation approach. The BOW refinement process was purely manual and based on clinical domain knowledge, an approach that would not generalize easily to other applications. Finally, this pilot study was realized on a subset of clinical notes from a unique small population in 6 health care organizations, possibly making additional adaptations required to generalize to a larger population (eg, retraining the machine learning algorithms, refining the dictionaries used).</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We developed and used an NLP-based information extraction application to generate discrete and accurate data to identify pediatric patients with CAP. Our main objective was good positive predictive value and improved sensitivity when compared with human domain experts. The pneumonia information extraction application used methods and resources that were trained and evaluated with our reports collection, using a 5-fold cross-validation approach. It allowed for classifying pediatric diagnostic imaging reports with a higher accuracy than that by human domain experts (ie, higher sensitivity and similar positive predictive value and specificity) in this pilot study. After this study, it was used to extract information and classify a much larger collection of diagnostic imaging reports (more than 10,000) in the PHIS+ database, for subsequent community-acquired pneumonia research comparing the effectiveness of different treatment options.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>Current Procedural Terminology Codes used to select relevant imaging studies.</p>
        <media xlink:href="jmir_v19i5e162_app1.pdf" xlink:title="PDF File (Adobe PDF File), 213KB"/>
      </app>
      <app id="app2">
        <title>Multimedia Appendix 2</title>
        <p>Annotation guideline.</p>
        <media xlink:href="jmir_v19i5e162_app2.pdf" xlink:title="PDF File (Adobe PDF File), 49KB"/>
      </app>
      <app id="app3">
        <title>Multimedia Appendix 3</title>
        <p>Refined bag-of-words.</p>
        <media xlink:href="jmir_v19i5e162_app3.pdf" xlink:title="PDF File (Adobe PDF File), 220KB"/>
      </app>
      <app id="app4">
        <title>Multimedia Appendix 4</title>
        <p>Document level classification accuracy with different machine learning algorithms.</p>
        <media xlink:href="jmir_v19i5e162_app4.pdf" xlink:title="PDF File (Adobe PDF File), 37KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BOW</term>
          <def>
            <p>bag-of-words</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CAP</term>
          <def>
            <p>community-acquired pneumonia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CER</term>
          <def>
            <p>comparative effectiveness research</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CPT</term>
          <def>
            <p>Current Procedural Terminology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ICD-9-CM</term>
          <def>
            <p>International Classification of Diseases, 9th revision, Clinical Modification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NLP</term>
          <def>
            <p>Natural Language Processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PHIS+</term>
          <def>
            <p>Pediatric Health Information System, augmented</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">RBF</term>
          <def>
            <p>radial basis function</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">SVM</term>
          <def>
            <p>Support Vector Machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">UIMA</term>
          <def>
            <p>Unstructured Information Management Architecture</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was approved by the Institutional Review Board of the Children's Hospital of Philadelphia (CHOP), as the primary recipient of the PHIS+ grant funding. A business associates’ agreement was used between each hospital and the Children’s Hospital Association to authorize sharing of data with identifiers, and a data use agreement governed the sharing of deidentified hospital clinical data. This project was funded under grant number R01 HS019862 from the AHRQ. We thank Ron Keren, MD, MPH, for his advice and leadership of the PHIS+ project. We also thank the Pediatric Research in Inpatient Settings (PRIS) Research Network (www.prisnetwork.org).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>SMM conceived the NLP system and led its development. This work was done while he was part of the University of Utah Biomedical Informatics Department. RG was responsible for the data access, preparation, and analysis. JST, JMS, RS, and SSS offered their clinical domain expertise. JST, JMS, and SSS annotated the reference standard. SSS was responsible for the clinical project and evaluation. SMM drafted the initial manuscript. RG, JST, JMS, RS, and SSS provided critical revision of the manuscript. All authors gave the final approval of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>GE</given-names>
          </name>
          <name name-style="western">
            <surname>Lorch</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Sheffler-Collins</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Kronman</surname>
            <given-names>MP</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>SS</given-names>
          </name>
        </person-group>
        <article-title>National hospitalization trends for pediatric pneumonia and associated complications</article-title>
        <source>Pediatrics</source>  
        <year>2010</year>  
        <month>08</month>  
        <volume>126</volume>  
        <issue>2</issue>  
        <fpage>204</fpage>  
        <lpage>13</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20643717"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1542/peds.2009-3109</pub-id>
        <pub-id pub-id-type="medline">20643717</pub-id>
        <pub-id pub-id-type="pii">peds.2009-3109</pub-id>
        <pub-id pub-id-type="pmcid">PMC2914815</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Keren</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Luan</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Localio</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Hall</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>McLeod</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Dai</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Srivastava</surname>
            <given-names>R</given-names>
          </name>
          <collab>Pediatric Research in Inpatient Settings (PRIS) Network</collab>
        </person-group>
        <article-title>Prioritization of comparative effectiveness research topics in hospital pediatrics</article-title>
        <source>Arch Pediatr Adolesc Med</source>  
        <year>2012</year>  
        <month>12</month>  
        <volume>166</volume>  
        <issue>12</issue>  
        <fpage>1155</fpage>  
        <lpage>64</lpage>  
        <pub-id pub-id-type="doi">10.1001/archpediatrics.2012.1266</pub-id>
        <pub-id pub-id-type="medline">23027409</pub-id>
        <pub-id pub-id-type="pii">1363507</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bradley</surname>
            <given-names>JS</given-names>
          </name>
          <name name-style="western">
            <surname>Byington</surname>
            <given-names>CL</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>SS</given-names>
          </name>
          <name name-style="western">
            <surname>Alverson</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Carter</surname>
            <given-names>ER</given-names>
          </name>
          <name name-style="western">
            <surname>Harrison</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Kaplan</surname>
            <given-names>SL</given-names>
          </name>
          <name name-style="western">
            <surname>Mace</surname>
            <given-names>SE</given-names>
          </name>
          <name name-style="western">
            <surname>McCracken</surname>
            <given-names>GH</given-names>
          </name>
          <name name-style="western">
            <surname>Moore</surname>
            <given-names>MR</given-names>
          </name>
          <name name-style="western">
            <surname>St Peter</surname>
            <given-names>SD</given-names>
          </name>
          <name name-style="western">
            <surname>Stockwell</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Swanson</surname>
            <given-names>JT</given-names>
          </name>
          <collab>Pediatric Infectious Diseases Societythe Infectious Diseases Society of America</collab>
        </person-group>
        <article-title>The management of community-acquired pneumonia in infants and children older than 3 months of age: clinical practice guidelines by the Pediatric Infectious Diseases Society and the Infectious Diseases Society of America</article-title>
        <source>Clin Infect Dis</source>  
        <year>2011</year>  
        <month>10</month>  
        <volume>53</volume>  
        <issue>7</issue>  
        <fpage>e25</fpage>  
        <lpage>76</lpage>  
        <pub-id pub-id-type="doi">10.1093/cid/cir531</pub-id>
        <pub-id pub-id-type="medline">21880587</pub-id>
        <pub-id pub-id-type="pii">cir531</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ambroggio</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Taylor</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Tabb</surname>
            <given-names>LP</given-names>
          </name>
          <name name-style="western">
            <surname>Newschaffer</surname>
            <given-names>CJ</given-names>
          </name>
          <name name-style="western">
            <surname>Evans</surname>
            <given-names>AA</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>SS</given-names>
          </name>
        </person-group>
        <article-title>Comparative effectiveness of empiric β-lactam monotherapy and β-lactam-macrolide combination therapy in children hospitalized with community-acquired pneumonia</article-title>
        <source>J Pediatr</source>  
        <year>2012</year>  
        <month>12</month>  
        <volume>161</volume>  
        <issue>6</issue>  
        <fpage>1097</fpage>  
        <lpage>103</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.jpeds.2012.06.067</pub-id>
        <pub-id pub-id-type="medline">22901738</pub-id>
        <pub-id pub-id-type="pii">S0022-3476(12)00756-1</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Williams</surname>
            <given-names>DJ</given-names>
          </name>
          <name name-style="western">
            <surname>Hall</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>SS</given-names>
          </name>
          <name name-style="western">
            <surname>Parikh</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Tyler</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Neuman</surname>
            <given-names>MI</given-names>
          </name>
          <name name-style="western">
            <surname>Hersh</surname>
            <given-names>AL</given-names>
          </name>
          <name name-style="western">
            <surname>Brogan</surname>
            <given-names>TV</given-names>
          </name>
          <name name-style="western">
            <surname>Blaschke</surname>
            <given-names>AJ</given-names>
          </name>
          <name name-style="western">
            <surname>Grijalva</surname>
            <given-names>CG</given-names>
          </name>
        </person-group>
        <article-title>Narrow vs broad-spectrum antimicrobial therapy for children hospitalized with pneumonia</article-title>
        <source>Pediatrics</source>  
        <year>2013</year>  
        <month>11</month>  
        <volume>132</volume>  
        <issue>5</issue>  
        <fpage>e1141</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://pediatrics.aappublications.org/cgi/pmidlookup?view=long&#38;pmid=24167170"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1542/peds.2013-1614</pub-id>
        <pub-id pub-id-type="medline">24167170</pub-id>
        <pub-id pub-id-type="pii">peds.2013-1614</pub-id>
        <pub-id pub-id-type="pmcid">PMC4530302</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kaafarani</surname>
            <given-names>HM</given-names>
          </name>
          <name name-style="western">
            <surname>Rosen</surname>
            <given-names>AK</given-names>
          </name>
        </person-group>
        <article-title>Using administrative data to identify surgical adverse events: an introduction to the Patient Safety Indicators</article-title>
        <source>Am J Surg</source>  
        <year>2009</year>  
        <month>11</month>  
        <volume>198</volume>  
        <issue>5 Suppl</issue>  
        <fpage>S63</fpage>  
        <lpage>8</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.amjsurg.2009.08.008</pub-id>
        <pub-id pub-id-type="medline">19874937</pub-id>
        <pub-id pub-id-type="pii">S0002-9610(09)00471-1</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Williams</surname>
            <given-names>DJ</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>SS</given-names>
          </name>
          <name name-style="western">
            <surname>Myers</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hall</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Auger</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Queen</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Jerardi</surname>
            <given-names>KE</given-names>
          </name>
          <name name-style="western">
            <surname>McClain</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Wiggleton</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Tieder</surname>
            <given-names>JS</given-names>
          </name>
        </person-group>
        <article-title>Identifying pediatric community-acquired pneumonia hospitalizations: accuracy of administrative billing codes</article-title>
        <source>JAMA Pediatr</source>  
        <year>2013</year>  
        <month>09</month>  
        <volume>167</volume>  
        <issue>9</issue>  
        <fpage>851</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23896966"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1001/jamapediatrics.2013.186</pub-id>
        <pub-id pub-id-type="medline">23896966</pub-id>
        <pub-id pub-id-type="pii">1720225</pub-id>
        <pub-id pub-id-type="pmcid">PMC3907952</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>McClain</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Hall</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>SS</given-names>
          </name>
          <name name-style="western">
            <surname>Tieder</surname>
            <given-names>JS</given-names>
          </name>
          <name name-style="western">
            <surname>Myers</surname>
            <given-names>AL</given-names>
          </name>
          <name name-style="western">
            <surname>Auger</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Statile</surname>
            <given-names>AM</given-names>
          </name>
          <name name-style="western">
            <surname>Jerardi</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Queen</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Fieldston</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Williams</surname>
            <given-names>DJ</given-names>
          </name>
        </person-group>
        <article-title>Admission chest radiographs predict illness severity for children hospitalized with pneumonia</article-title>
        <source>J Hosp Med</source>  
        <year>2014</year>  
        <month>09</month>  
        <volume>9</volume>  
        <issue>9</issue>  
        <fpage>559</fpage>  
        <lpage>64</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24942619"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1002/jhm.2227</pub-id>
        <pub-id pub-id-type="medline">24942619</pub-id>
        <pub-id pub-id-type="pmcid">PMC4154996</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Narus</surname>
            <given-names>SP</given-names>
          </name>
          <name name-style="western">
            <surname>Srivastava</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Gouripeddi</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Livne</surname>
            <given-names>OE</given-names>
          </name>
          <name name-style="western">
            <surname>Mo</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Bickel</surname>
            <given-names>JP</given-names>
          </name>
          <name name-style="western">
            <surname>de</surname>
            <given-names>RD</given-names>
          </name>
          <name name-style="western">
            <surname>Hales</surname>
            <given-names>JW</given-names>
          </name>
          <name name-style="western">
            <surname>Kirkendall</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Stepanek</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Toth</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Keren</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Federating clinical data from six pediatric hospitals: process and initial results from the PHIS+ Consortium</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2011</year>  
        <volume>2011</volume>  
        <fpage>994</fpage>  
        <lpage>1003</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22195159"/>
        </comment>  
        <pub-id pub-id-type="medline">22195159</pub-id>
        <pub-id pub-id-type="pmcid">PMC3243196</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gouripeddi</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Warner</surname>
            <given-names>PB</given-names>
          </name>
          <name name-style="western">
            <surname>Mo</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Levin</surname>
            <given-names>JE</given-names>
          </name>
          <name name-style="western">
            <surname>Srivastava</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>SS</given-names>
          </name>
          <name name-style="western">
            <surname>de</surname>
            <given-names>RD</given-names>
          </name>
          <name name-style="western">
            <surname>Kirkendall</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Bickel</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Korgenski</surname>
            <given-names>EK</given-names>
          </name>
          <name name-style="western">
            <surname>Precourt</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Stepanek</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Mitchell</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Narus</surname>
            <given-names>SP</given-names>
          </name>
          <name name-style="western">
            <surname>Keren</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Federating clinical data from six pediatric hospitals: process and initial results for microbiology from the PHIS+ consortium</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2012</year>  
        <volume>2012</volume>  
        <fpage>281</fpage>  
        <lpage>90</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23304298"/>
        </comment>  
        <pub-id pub-id-type="medline">23304298</pub-id>
        <pub-id pub-id-type="pmcid">PMC3540481</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Meystre</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Gouripeddi</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Mitchell</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Automatic pediatric pneumonia characteristics extraction from diagnostic imaging reports in a multi-institutional clinical repository</article-title>
        <year>2013</year>  
        <conf-name>2013 Joint Summits on Translational Science</conf-name>
        <conf-date>March 18-22, 2013</conf-date>
        <conf-loc>San Francisco</conf-loc></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Meystre</surname>
            <given-names>SM</given-names>
          </name>
          <name name-style="western">
            <surname>Savova</surname>
            <given-names>GK</given-names>
          </name>
          <name name-style="western">
            <surname>Kipper-Schuler</surname>
            <given-names>KC</given-names>
          </name>
          <name name-style="western">
            <surname>Hurdle</surname>
            <given-names>JF</given-names>
          </name>
        </person-group>
        <article-title>Extracting information from textual documents in the electronic health record: a review of recent research</article-title>
        <source>Yearb Med Inform</source>  
        <year>2008</year>  
        <fpage>128</fpage>  
        <lpage>44</lpage>  
        <pub-id pub-id-type="medline">18660887</pub-id>
        <pub-id pub-id-type="pii">me08010128</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Wilbur</surname>
            <given-names>WJ</given-names>
          </name>
        </person-group>
        <article-title>Corpus-based statistical screening for phrase identification</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2000</year>  
        <volume>7</volume>  
        <issue>5</issue>  
        <fpage>499</fpage>  
        <lpage>511</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=10984469"/>
        </comment>  
        <pub-id pub-id-type="medline">10984469</pub-id>
        <pub-id pub-id-type="pmcid">PMC79045</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Friedman</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>A broad-coverage natural language processing system</article-title>
        <source>Proc AMIA Symp</source>  
        <year>2000</year>  
        <fpage>270</fpage>  
        <lpage>4</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/11079887"/>
        </comment>  
        <pub-id pub-id-type="medline">11079887</pub-id>
        <pub-id pub-id-type="pii">D200144</pub-id>
        <pub-id pub-id-type="pmcid">PMC2243979</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>WW</given-names>
          </name>
          <name name-style="western">
            <surname>Haug</surname>
            <given-names>PJ</given-names>
          </name>
        </person-group>
        <article-title>Comparing expert systems for identifying chest x-ray reports that support pneumonia</article-title>
        <source>Proc AMIA Symp</source>  
        <year>1999</year>  
        <fpage>216</fpage>  
        <lpage>20</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/10566352"/>
        </comment>  
        <pub-id pub-id-type="medline">10566352</pub-id>
        <pub-id pub-id-type="pii">D005602</pub-id>
        <pub-id pub-id-type="pmcid">PMC2232555</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mendonça</surname>
            <given-names>EA</given-names>
          </name>
          <name name-style="western">
            <surname>Haas</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Shagina</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Larson</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Friedman</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Extracting information on pneumonia in infants using natural language processing of radiology reports</article-title>
        <source>J Biomed Inform</source>  
        <year>2005</year>  
        <month>08</month>  
        <volume>38</volume>  
        <issue>4</issue>  
        <fpage>314</fpage>  
        <lpage>21</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(05)00016-X"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2005.02.003</pub-id>
        <pub-id pub-id-type="medline">16084473</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(05)00016-X</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Johnson</surname>
            <given-names>SB</given-names>
          </name>
          <name name-style="western">
            <surname>Friedman</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Integrating data from natural language processing into a clinical information system</article-title>
        <source>Proc AMIA Annu Fall Symp</source>  
        <year>1996</year>  
        <fpage>537</fpage>  
        <lpage>41</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/8947724"/>
        </comment>  
        <pub-id pub-id-type="medline">8947724</pub-id>
        <pub-id pub-id-type="pmcid">PMC2233157</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Clark</surname>
            <given-names>MP</given-names>
          </name>
          <name name-style="western">
            <surname>Mendoza</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Saket</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Gardner</surname>
            <given-names>MN</given-names>
          </name>
          <name name-style="western">
            <surname>Turk</surname>
            <given-names>BJ</given-names>
          </name>
          <name name-style="western">
            <surname>Escobar</surname>
            <given-names>GJ</given-names>
          </name>
        </person-group>
        <article-title>Automated identification of pneumonia in chest radiograph reports in critically ill patients</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2013</year>  
        <month>08</month>  
        <day>15</day>  
        <volume>13</volume>  
        <fpage>90</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-13-90"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6947-13-90</pub-id>
        <pub-id pub-id-type="medline">23947340</pub-id>
        <pub-id pub-id-type="pii">1472-6947-13-90</pub-id>
        <pub-id pub-id-type="pmcid">PMC3765332</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dublin</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Baldwin</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Walker</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Christensen</surname>
            <given-names>LM</given-names>
          </name>
          <name name-style="western">
            <surname>Haug</surname>
            <given-names>PJ</given-names>
          </name>
          <name name-style="western">
            <surname>Jackson</surname>
            <given-names>ML</given-names>
          </name>
          <name name-style="western">
            <surname>Nelson</surname>
            <given-names>JC</given-names>
          </name>
          <name name-style="western">
            <surname>Ferraro</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Carrell</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>WW</given-names>
          </name>
        </person-group>
        <article-title>Natural Language Processing to identify pneumonia from radiology reports</article-title>
        <source>Pharmacoepidemiol Drug Saf</source>  
        <year>2013</year>  
        <month>08</month>  
        <volume>22</volume>  
        <issue>8</issue>  
        <fpage>834</fpage>  
        <lpage>41</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23554109"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1002/pds.3418</pub-id>
        <pub-id pub-id-type="medline">23554109</pub-id>
        <pub-id pub-id-type="pmcid">PMC3811072</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>DeLisle</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Deepak</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Siddiqui</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Gundlapalli</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Samore</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>D'Avolio</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Using the electronic medical record to identify community-acquired pneumonia: toward a replicable automated strategy</article-title>
        <source>PLoS One</source>  
        <year>2013</year>  
        <volume>8</volume>  
        <issue>8</issue>  
        <fpage>e70944</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0070944"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0070944</pub-id>
        <pub-id pub-id-type="medline">23967138</pub-id>
        <pub-id pub-id-type="pii">PONE-D-13-18611</pub-id>
        <pub-id pub-id-type="pmcid">PMC3742728</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cai</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Giannopoulos</surname>
            <given-names>AA</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Kelil</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Ripley</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Kumamaru</surname>
            <given-names>KK</given-names>
          </name>
          <name name-style="western">
            <surname>Rybicki</surname>
            <given-names>FJ</given-names>
          </name>
          <name name-style="western">
            <surname>Mitsouras</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Natural language processing technologies in radiology research and clinical applications</article-title>
        <source>Radiographics</source>  
        <year>2016</year>  
        <volume>36</volume>  
        <issue>1</issue>  
        <fpage>176</fpage>  
        <lpage>91</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26761536"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1148/rg.2016150080</pub-id>
        <pub-id pub-id-type="medline">26761536</pub-id>
        <pub-id pub-id-type="pmcid">PMC4734053</pub-id></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>American Medical Association</collab>
        </person-group>
        <source>AMA-ASSN</source>  
        <access-date>2013-02-15</access-date>
        <comment>CPT - Current Procedural Terminology 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.ama-assn.org/ama/pub/physician-resources/solutions-managing-your-practice/coding-billing-insurance/cpt.page">http://www.ama-assn.org/ama/pub/physician-resources/solutions-managing-your-practice/coding-billing-insurance/cpt.page</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6ESKL24Ap"/></comment> </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
        <source>DE-IDATA</source>  
        <access-date>2016-10-25</access-date>
        <comment>DE-ID Software 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.de-idata.com/">http://www.de-idata.com/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6lWrAN6St"/></comment> </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
        <source>Apache</source>  
        <access-date>2016-10-25</access-date>
        <comment>UIMA (Unstructured Information Management Architecture) 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://uima.apache.org/">http://uima.apache.org/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6lWqpTAtM"/></comment> </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Meystre</surname>
            <given-names>SM</given-names>
          </name>
          <name name-style="western">
            <surname>Thibault</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hurdle</surname>
            <given-names>JF</given-names>
          </name>
          <name name-style="western">
            <surname>South</surname>
            <given-names>BR</given-names>
          </name>
        </person-group>
        <article-title>Textractor: a hybrid system for medications and reason for their prescription extraction from clinical text documents</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2010</year>  
        <volume>17</volume>  
        <issue>5</issue>  
        <fpage>559</fpage>  
        <lpage>62</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=20819864"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/jamia.2010.004028</pub-id>
        <pub-id pub-id-type="medline">20819864</pub-id>
        <pub-id pub-id-type="pii">17/5/559</pub-id>
        <pub-id pub-id-type="pmcid">PMC2995680</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
        <source>Apache</source>  
        <access-date>2014-01-01</access-date>
        <comment>Welcome to Apache OpenNLP 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://opennlp.apache.org">http://opennlp.apache.org</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6MIJrWJJY"/></comment> </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Friedman</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Cimino</surname>
            <given-names>JJ</given-names>
          </name>
          <name name-style="western">
            <surname>Johnson</surname>
            <given-names>SB</given-names>
          </name>
        </person-group>
        <article-title>A conceptual model for clinical radiology reports</article-title>
        <source>Proc Annu Symp Comput Appl Med Care</source>  
        <year>1993</year>  
        <fpage>829</fpage>  
        <lpage>33</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/8130594"/>
        </comment>  
        <pub-id pub-id-type="medline">8130594</pub-id>
        <pub-id pub-id-type="pmcid">PMC2850685</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Chu</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Dowling</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>ConText: an algorithm for identifying contextual features from clinical text</article-title>
        <year>2007</year>  
        <conf-name>BioNLP '07 Proceedings of the Workshop on BioNLP 2007</conf-name>
        <conf-date>June 29, 2007</conf-date>
        <conf-loc>Prague, Czech Republic</conf-loc>
        <fpage>81</fpage>  
        <lpage>8</lpage> </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>C-C</given-names>
          </name>
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>C-J</given-names>
          </name>
        </person-group>
        <source>NTU CSIE</source>  
        <access-date>2016-10-25</access-date>
        <comment>LIBSVM : a library for support vector machines 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.csie.ntu.edu.tw/~cjlin/libsvm/">https://www.csie.ntu.edu.tw/~cjlin/libsvm/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6lWr1s08H"/></comment> </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
        <source>Waikato</source>  
        <access-date>2017-04-14</access-date>
        <comment>Weka 3: Data Mining Software in Java 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.cs.waikato.ac.nz/ml/weka/">http://www.cs.waikato.ac.nz/ml/weka/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6piyFTt5G"/></comment> </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>van Rijsbergen</surname>
            <given-names>CJ</given-names>
          </name>
        </person-group>
        <source>Openlib</source>  
        <year>1979</year>  
        <access-date>2017-04-26</access-date>
        <comment>Information retrieval 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://openlib.org/home/krichel/courses/lis618/readings/rijsbergen79_infor_retriev.pdf">http://openlib.org/home/krichel/courses/lis618/readings/rijsbergen79_infor_retriev.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6q0RSjrif"/></comment> </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Platt</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Fast training of support vector machines using sequential minimal optimization</article-title>
        <source>Advances in kernel methods</source>  
        <year>1999</year>  
        <publisher-loc>Cambridge, MA</publisher-loc>
        <publisher-name>MIT Press</publisher-name>
        <fpage>185</fpage>  
        <lpage>210</lpage> </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Quinlan</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>C4.5: programs for machine learning</source>  
        <year>1993</year>  
        <publisher-loc>San Francisco, CA</publisher-loc>
        <publisher-name>Morgan Kaufmann Publishers Inc</publisher-name></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cohen</surname>
            <given-names>WW</given-names>
          </name>
        </person-group>
        <article-title>Fast Effective Rule Induction</article-title>
        <year>1995</year>  
        <conf-name>Proceedings of the Twelfth International Conference on Machine Learning</conf-name>
        <conf-date>1995</conf-date>
        <conf-loc>Tahoe City, CA</conf-loc>
        <fpage>115</fpage>  
        <lpage>23</lpage> </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>John</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Langley</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Estimating continuous distributions in Bayesian classifiers</article-title>
        <year>1995</year>  
        <conf-name>UAI'95 Proceedings of the Eleventh conference on Uncertainty in artificial intelligence</conf-name>
        <conf-date>August 18-20, 1995</conf-date>
        <conf-loc>Montreal, Canada</conf-loc>
        <fpage>338</fpage>  
        <lpage>345</lpage> </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cooper</surname>
            <given-names>GF</given-names>
          </name>
          <name name-style="western">
            <surname>Herskovits</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>A Bayesian method for the induction of probabilistic networks from data</article-title>
        <source>Machine Learning</source>  
        <year>1992</year>  
        <month>10</month>  
        <volume>9</volume>  
        <issue>4</issue>  
        <fpage>309</fpage>  
        <lpage>47</lpage> </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Fiszman</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Haug</surname>
            <given-names>PJ</given-names>
          </name>
        </person-group>
        <article-title>Using medical language processing to support real-time evaluation of pneumonia guidelines</article-title>
        <source>Proc AMIA Symp</source>  
        <year>2000</year>  
        <fpage>235</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/11079880"/>
        </comment>  
        <pub-id pub-id-type="medline">11079880</pub-id>
        <pub-id pub-id-type="pii">D200363</pub-id>
        <pub-id pub-id-type="pmcid">PMC2244071</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
