<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v19i11e380</article-id>
    <article-id pub-id-type="pmid">29109070</article-id>
    <article-id pub-id-type="doi">10.2196/jmir.8344</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Artificial Intelligence Learning Semantics via External Resources for Classifying Diagnosis Codes in Discharge Notes</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Tao</surname>
          <given-names>Shiqiang</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Robinson</surname>
          <given-names>Robert</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Chu</surname>
          <given-names>Chi-Ming</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1">
        <name name-style="western">
          <surname>Lin</surname>
          <given-names>Chin</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-2337-2096</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib2">
        <name name-style="western">
          <surname>Hsu</surname>
          <given-names>Chia-Jung</given-names>
        </name>
        <degrees>BS</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9969-4855</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Lou</surname>
          <given-names>Yu-Sheng</given-names>
        </name>
        <degrees>BS</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9115-2656</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Yeh</surname>
          <given-names>Shih-Jen</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-5393-3996</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5" equal-contrib="yes">
        <name name-style="western">
          <surname>Lee</surname>
          <given-names>Chia-Cheng</given-names>
        </name>
        <degrees>MD</degrees>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7450-504X</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib6" equal-contrib="yes">
        <name name-style="western">
          <surname>Su</surname>
          <given-names>Sui-Lung</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-3122-1116</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib7" corresp="yes" equal-contrib="yes">
      <name name-style="western">
        <surname>Chen</surname>
        <given-names>Hsiang-Cheng</given-names>
      </name>
      <degrees>MD, PhD</degrees>
      <xref rid="aff5" ref-type="aff">5</xref>
      <address>
        <institution>Division of Rheumatology/Immunology/Allergy, Department of Internal Medicine</institution>
        <institution>Tri-Service General Hospital</institution>
        <institution>National Defense Medical Center</institution>
        <addr-line>No.161, Min-Chun E. Rd., Sec. 6, Neihu</addr-line>
        <addr-line>Taipei, 114</addr-line>
        <country>Taiwan</country>
        <phone>886 2 7927135</phone>
        <email>hccheng@ndmctsgh.edu.tw</email>
      </address>  
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-0753-6161</ext-link></contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>School of Public Health</institution>
    <institution>National Defense Medical Center</institution>  
    <addr-line>Taipei</addr-line>
    <country>Taiwan</country></aff>
    <aff id="aff2">
    <sup>2</sup>
    <institution>Department of Research and Development</institution>
    <institution>National Defense Medical Center</institution>  
    <addr-line>Taipei</addr-line>
    <country>Taiwan</country></aff>
    <aff id="aff3">
    <sup>3</sup>
    <institution>Planning and Management Office</institution>
    <institution>Tri-Service General Hospital</institution>  
    <institution>National Defense Medical Center</institution>  
    <addr-line>Taipei</addr-line>
    <country>Taiwan</country></aff>
    <aff id="aff4">
      <sup>4</sup>
      <institution>Da-Yeh University</institution>
      <addr-line>Changhua</addr-line>
      <country>Taiwan</country>
    </aff>
    <aff id="aff5">
    <sup>5</sup>
    <institution>Division of Rheumatology/Immunology/Allergy, Department of Internal Medicine</institution>
    <institution>Tri-Service General Hospital</institution>  
    <institution>National Defense Medical Center</institution>  
    <addr-line>Taipei</addr-line>
    <country>Taiwan</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Hsiang-Cheng Chen 
      <email>hccheng@ndmctsgh.edu.tw</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><month>11</month><year>2017</year></pub-date>
    <pub-date pub-type="epub">
      <day>06</day>
      <month>11</month>
      <year>2017</year>
    </pub-date>
    <volume>19</volume>
    <issue>11</issue>
    <elocation-id>e380</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>20</day>
        <month>7</month>
        <year>2017</year>
      </date>
      <date date-type="rev-request">
        <day>7</day>
        <month>9</month>
        <year>2017</year>
      </date>
      <date date-type="rev-recd">
        <day>25</day>
        <month>9</month>
        <year>2017</year>
      </date>
      <date date-type="accepted">
        <day>4</day>
        <month>10</month>
        <year>2017</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Chin Lin, Chia-Jung Hsu, Yu-Sheng Lou, Shih-Jen Yeh, Chia-Cheng Lee, Sui-Lung Su, Hsiang-Cheng Chen. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 06.11.2017.</copyright-statement>
    <copyright-year>2017</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://www.jmir.org/2017/11/e380/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>Automated disease code classification using free-text medical information is important for public health surveillance. However, traditional natural language processing (NLP) pipelines are limited, so we propose a method combining word embedding with a convolutional neural network (CNN).</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>Our objective was to compare the performance of traditional pipelines (NLP plus supervised machine learning models) with that of word embedding combined with a CNN in conducting a classification task identifying <italic>International Classification of Diseases, Tenth Revision, Clinical Modification</italic> (<italic>ICD-10-CM</italic>) diagnosis codes in discharge notes.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We used 2 classification methods: (1) extracting from discharge notes some features (terms, n-gram phrases, and SNOMED CT categories) that we used to train a set of supervised machine learning models (support vector machine, random forests, and gradient boosting machine), and (2) building a feature matrix, by a pretrained word embedding model, that we used to train a CNN. We used these methods to identify the chapter-level <italic>ICD-10-CM</italic> diagnosis codes in a set of discharge notes. We conducted the evaluation using 103,390 discharge notes covering patients hospitalized from June 1, 2015 to January 31, 2017 in the Tri-Service General Hospital in Taipei, Taiwan. We used the receiver operating characteristic curve as an evaluation measure, and calculated the area under the curve (AUC) and F-measure as the global measure of effectiveness.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>In 5-fold cross-validation tests, our method had a higher testing accuracy (mean AUC 0.9696; mean F-measure 0.9086) than traditional NLP-based approaches (mean AUC range 0.8183-0.9571; mean F-measure range 0.5050-0.8739). A real-world simulation that split the training sample and the testing sample by date verified this result (mean AUC 0.9645; mean F-measure 0.9003 using the proposed method). Further analysis showed that the convolutional layers of the CNN effectively identified a large number of keywords and automatically extracted enough concepts to predict the diagnosis codes.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>Word embedding combined with a CNN showed outstanding performance compared with traditional methods, needing very little data preprocessing. This shows that future studies will not be limited by incomplete dictionaries. A large amount of unstructured information from free-text medical writing will be extracted by automated approaches in the future, and we believe that the health care field is about to enter the age of big data.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>word embedding</kwd>
      <kwd>convolutional neural network</kwd>
      <kwd>neural networks (computer)</kwd>
      <kwd>natural language processing</kwd>
      <kwd>text mining</kwd>
      <kwd>data mining</kwd>
      <kwd>machine learning</kwd>
      <kwd>electronic medical records</kwd>
      <kwd>electronic health records</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Public health surveillance systems are important for identifying unusual events of public health importance and will provide information for public health action [<xref ref-type="bibr" rid="ref1">1</xref>]. However, most surveillance systems can only use structured data, such as <italic>International Classification of Diseases, Tenth Revision, Clinical Modification</italic> (<italic>ICD-10-CM</italic>) diagnosis codes. The current methods for collecting this structured information usually involve manual identification, but manual identification of disease codes from free-text clinical narratives is laborious and costly. Moreover, most surveillance systems do not have enough expert clinical coders for real-time surveillance, and this leads to delays in the release of disease statistics. Government health administrators need timely information to rapidly assess disease prevention and health protection priorities. A timely and computer-based disease classification approach is required to further assist public health action.</p>
      <p>Automated surveillance methods are increasingly being researched because of the increasing volume and accessibility of electronic medical data, and a range of studies have proven the feasibility of extracting structured information from clinical narratives [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. Previous studies suggested that these text mining approaches would need to effectively deal with the idiosyncrasies of the clinical sublanguage to further improve performance [<xref ref-type="bibr" rid="ref7">7</xref>]. However, compiling a complete medical dictionary may be impossible because of the variability of clinical vocabularies. Moreover, traditional natural language processing (NLP) pipelines can deal with synonyms but not similar terms, so supervised machine learning models often face the curse of dimensionality. For example, if we only want to identify infectious disease-related medical documents, bacteria names such as <italic>Streptococcus pneumoniae</italic> and <italic>Mycobacterium tuberculosis</italic> can actually be treated as similar for classification purposes. An effective text preprocessing approach would need to learn how to combine similar concepts, and current NLP pipelines often cannot deal with this issue.</p>
      <p>Another important challenge for automated surveillance algorithms is emerging disease. For example, influenza H1N1 broke out in 2009 and could not have been recorded in any medical records before 2008. Traditional automatic methods based on term vectors cannot use new terms [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. This weakness means that traditional methods cannot possibly implement a fully automated pipeline. The key reason that human experts can successfully identify emerging diseases is that humans can learn semantics from external resources. Traditionally, these external resources usually take the form of a dictionary, and this is what will be used in the NLP pipeline. However, dictionary construction is laborious, and it is still difficult to completely include all semantic relationships. In summary, traditional NLP pipelines are complex and inefficient, and successful automated surveillance methods will also need to include automatic handling of semantics.</p>
      <p>Word embedding is a feature learning technique where vocabularies are mapped to vectors of real numbers [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Word2vec [<xref ref-type="bibr" rid="ref10">10</xref>] and GloVe [<xref ref-type="bibr" rid="ref11">11</xref>] are the 2 most popular word embedding algorithms. These methods showcase interesting linear substructures in the word vector space: word vectors for similar concepts are likewise close in terms of cosine similarity and Euclidean distance. This property may help us identify concept groups and reduce the data dimensionality in future machine learning algorithms. However, clinical narratives will be transformed into a matrix, and standardization to vectors with different length is difficult for general machine learning models. Convolutional neural networks (CNNs) use layers with convolving filters that are applied to local features, and they can handle matrix input [<xref ref-type="bibr" rid="ref12">12</xref>]. CNNs were originally invented for computer vision applications and have subsequently been shown to achieve excellent results in semantic parsing [<xref ref-type="bibr" rid="ref13">13</xref>], search query retrieval [<xref ref-type="bibr" rid="ref14">14</xref>], and sentence classification [<xref ref-type="bibr" rid="ref15">15</xref>]. The key reason for the success of CNNs is their fuzzy matching using convolving filters, and we believe that convolving filters are a great way to process similar texts involving the same concepts. A lot of words and phrases that are conceptually similar can be combined in a convolving filter via fuzzy matching technology, thereby reducing the data dimensionality and avoiding overfitting.</p>
      <p>This project aimed to compare traditional machine learning pipelines (NLP plus supervised machine learning models) versus word embedding combined with a CNN in order to identify chapter-level <italic>ICD-10-CM</italic> diagnosis codes in discharge notes. We hoped to develop an efficient and effective real-time surveillance pipeline for disease statistics. In addition, we further analyzed the convolving filters of the CNN to understand their functions.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source</title>
        <p>The Tri-Service General Hospital, Taipei, Taiwan, supplied deidentified free-text discharge notes from June 1, 2015 to January 31, 2017. Research ethics approval was given by the institutional ethical committee and medical records office of the Tri-Service General Hospital to collect data without individual consent for sites where data are directly collected. The Tri-Service General Hospital is located in the Neihu District of Taipei under the name of National Defense Medical Center and provides medical service for service members, their family dependents, and civilians. It has been rated by the Ministry of Health and Welfare in Taiwan as a first-rate teaching hospital on the level of a medical center. The hospital has about 1700 beds and 6000 inpatients per month, and most inpatients are civilians. We collected a total of 103,390 discharge notes, and corrected misspellings using the Hunspell version 2.3 package [<xref ref-type="bibr" rid="ref16">16</xref>] and a dictionary built using English Wikipedia and Gigaword [<xref ref-type="bibr" rid="ref17">17</xref>]. <italic>ICD-10-CM</italic> codes had been used to label these discharge notes for the purpose of requesting health insurance fees, and the medical records department was responsible for their correctness. The Taiwan National Health Insurance Administration routinely samples a certain number of discharge notes for verification, and a wrongly labeled discharge note is punishable by a 10- to 20-fold fine. Discharge notes are often labeled with multiple <italic>ICD-10-CM</italic> codes, and all <italic>ICD-10-CM</italic> codes were truncated at the 1-character level. There are a total of 21 categories in the 2017 version. <xref ref-type="table" rid="table1">Table 1</xref> shows the frequency distribution of 1-character-level codes. Neoplasms and diseases of the circulatory system were the most common <italic>ICD-10-CM</italic> codes in our hospital.</p>
        <p>We used 2 testing procedures to assess the performance of the model. First, we conducted a 5-fold cross-validation test. Second, we created training and testing sets by splitting the sample by date (July 1, 2016), because this is more realistic. A classifier can only be trained using retrospective data in the real world, and it will be used to classify future data; the second testing process replicates this. All calculations were conducted on a Fujitsu RX2540M1 48-core CPU, 768 GB RAM server (Fujitsu Ltd, Tokyo, Japan), and the all-flash array was AccelStor NeoSapphire NS3505 (AccelStor, Inc, Taipei City, Taiwan) with a 5 TB serial advanced technology attachment-interface solid-state drive and connectivity of 56 GB/second FDR InfiniBand Quad Small Form-factor Pluggable (Fiberon Technologies, Inc, Westborough, MA, USA).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Prevalence of different <italic>International Classification of Diseases, Tenth Revision, Clinical Modification</italic> (<italic>ICD-10-CM</italic>) chapter-level codes in discharge notes from the Tri-Service General Hospital, Taipei, Taiwan.</p>
          </caption>
          <table width="1050" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="350"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td rowspan="2"><italic>ICD-10-CM</italic> code</td>
                <td rowspan="2">Definition</td>
                <td colspan="3">Stage of the study</td>
              </tr>
              <tr valign="top">
                <td>Before June 30, 2016 (n=64,023) <break/>n (%)</td>
                <td>After July 1, 2016 (n=39,367) <break/>n (%)</td>
                <td>Full study period (n=103,390) <break/>n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>A00-B99</td>
                <td>Certain infectious and parasitic diseases</td>
                <td>7731 (12.1%)</td>
                <td>5455 (13.9%)</td>
                <td>13,186 (12.8%)</td>
              </tr>
              <tr valign="top">
                <td>C00-D49</td>
                <td>Neoplasms</td>
                <td>20,585 (32.2%)</td>
                <td>13,993 (35.5%)</td>
                <td>34,578 (33.5%)</td>
              </tr>
              <tr valign="top">
                <td>D50-D89</td>
                <td>Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism</td>
                <td>4516 (7.1%)</td>
                <td>3132 (8.0%)</td>
                <td>7648 (7.4%)</td>
              </tr>
              <tr valign="top">
                <td>E00-E89</td>
                <td>Endocrine, nutritional, and metabolic diseases</td>
                <td>13,223 (20.7%)</td>
                <td>8765 (22.3%)</td>
                <td>21,988 (21.3%)</td>
              </tr>
              <tr valign="top">
                <td>F01-F99</td>
                <td>Mental, behavioral, and neurodevelopmental disorders</td>
                <td>4612 (7.2%)</td>
                <td>2942 (7.5%)</td>
                <td>7554 (7.3%)</td>
              </tr>
              <tr valign="top">
                <td>G00-G99</td>
                <td>Diseases of the nervous system</td>
                <td>3703 (5.8%)</td>
                <td>2602 (6.6%)</td>
                <td>6305 (6.1%)</td>
              </tr>
              <tr valign="top">
                <td>H00-H59</td>
                <td>Diseases of the eye and adnexa</td>
                <td>2337 (3.7%)</td>
                <td>1374 (3.5%)</td>
                <td>3711 (3.6%)</td>
              </tr>
              <tr valign="top">
                <td>H60-H95</td>
                <td>Diseases of the ear and mastoid process</td>
                <td>802 (1.3%)</td>
                <td>470 (1.2%)</td>
                <td>1272 (1.2%)</td>
              </tr>
              <tr valign="top">
                <td>I00-I99</td>
                <td>Diseases of the circulatory system</td>
                <td>17,650 (27.6%)</td>
                <td>11,465 (29.1%)</td>
                <td>29,115 (28.2%)</td>
              </tr>
              <tr valign="top">
                <td>J00-J99</td>
                <td>Diseases of the respiratory system</td>
                <td>7743 (12.1%)</td>
                <td>5584 (14.2%)</td>
                <td>13,327 (13.0%)</td>
              </tr>
              <tr valign="top">
                <td>K00-K95</td>
                <td>Diseases of the digestive system</td>
                <td>12,849 (20.1%)</td>
                <td>8444 (21.4%)</td>
                <td>21,293 (20.6%)</td>
              </tr>
              <tr valign="top">
                <td>L00-L99</td>
                <td>Diseases of the skin and subcutaneous tissue</td>
                <td>2568 (4.0%)</td>
                <td>1711 (4.3%)</td>
                <td>4279 (4.1%)</td>
              </tr>
              <tr valign="top">
                <td>M00-M99</td>
                <td>Diseases of the musculoskeletal system and connective tissue</td>
                <td>9170 (14.3%)</td>
                <td>5152 (13.1%)</td>
                <td>14,322 (13.9%)</td>
              </tr>
              <tr valign="top">
                <td>N00-N99</td>
                <td>Diseases of the genitourinary system</td>
                <td>9929 (15.5%)</td>
                <td>7325 (18.6%)</td>
                <td>17,254 (16.8%)</td>
              </tr>
              <tr valign="top">
                <td>O00-O9A</td>
                <td>Pregnancy, childbirth, and the puerperium</td>
                <td>2509 (3.9%)</td>
                <td>1271 (3.2%)</td>
                <td>3780 (3.7%)</td>
              </tr>
              <tr valign="top">
                <td>P00-P96</td>
                <td>Certain conditions originating in the perinatal period</td>
                <td>793 (1.2%)</td>
                <td>493 (1.3%)</td>
                <td>1286 (1.2%)</td>
              </tr>
              <tr valign="top">
                <td>Q00-Q99</td>
                <td>Congenital malformations, deformations, and chromosomal abnormalities</td>
                <td>927 (1.4%)</td>
                <td>513 (1.3%)</td>
                <td>1440 (1.4%)</td>
              </tr>
              <tr valign="top">
                <td>R00-R99</td>
                <td>Symptoms, signs, and abnormal clinical and laboratory findings, not elsewhere classified</td>
                <td>5271 (8.2%)</td>
                <td>3824 (9.7%)</td>
                <td>9095 (8.9%)</td>
              </tr>
              <tr valign="top">
                <td>S00-T88</td>
                <td>Injury, poisoning, and certain other consequences of external causes</td>
                <td>6272 (9.8%)</td>
                <td>4564 (11.6%)</td>
                <td>10,836 (10.6%)</td>
              </tr>
              <tr valign="top">
                <td>V00-Y99</td>
                <td>External causes of morbidity</td>
                <td>791 (1.2%)</td>
                <td>68 (0.2%)</td>
                <td>859 (0.8%)</td>
              </tr>
              <tr valign="top">
                <td>Z00-Z99</td>
                <td>Factors influencing health status and contact with health services</td>
                <td>15,488 (24.2%)</td>
                <td>10,093 (25.6%)</td>
                <td>25,581 (24.8%)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Traditional Free-Text Classification Techniques</title>
        <p>Traditional classification techniques often combine an NLP pipeline and a classifier to conduct free-text medical writing classification tasks. We extracted the detailed features from the discharge notes by the NLP pipeline; then <italic>ICD-10-CM</italic> codes were assigned by human experts to each discharge note. We used the labeled features to train a classifier, and we used the well-trained model to predict the unlabeled testing data.</p>
        <p>In this study, we used a 2-part NLP pipeline to extract the discharge note features. First, word-based features were directly extracted from the free-text description and n-gram phrases (n range 2-5) were generated by the RWeka version 0.4-30 package [<xref ref-type="bibr" rid="ref18">18</xref>]. To reduce the complexity of the data, we only included n-gram phrases with counts &#62;10. Second, we used SNOMED CT International Edition version 20170131 categories to integrate synonyms. We used the bag-of-words model to vectorize the extracted features (1 vector per discharge note) and transformed these feature vectors into a document-term matrix using the tm version 0.7 package [<xref ref-type="bibr" rid="ref19">19</xref>]. This matrix was then the input into the following machine learning models.</p>
        <sec>
          <title>Support Vector Machine</title>
          <p>Support vector machines (SVMs) are common classifiers in the machine learning field. They map all samples onto a hyperplane and divide them by a clear gap. In addition, kernel tricks are used to extend this hyperplane. SVMs are proven to have the best performance in free-text medical writing classification, compared with naive Bayes classifiers, C4.5 decision trees, and adaptive boosting [<xref ref-type="bibr" rid="ref20">20</xref>]. In this study, we used the 4 most common kernel tricks: linear, polynomial (degree=3), radial basis, and sigmoid. We used the e1071 package (R package version 1.6-8) [<xref ref-type="bibr" rid="ref21">21</xref>] as the SVM implementation and set all other parameters to their default values.</p>
        </sec>
        <sec>
          <title>Random Forest</title>
          <p>Random forests (RFs) construct multiple decision trees and use information from each tree to make predictions. It was the best-performing classification model in a previous text classification study [<xref ref-type="bibr" rid="ref22">22</xref>], compared with SVMs, naive Bayes classifiers, and the k-nearest neighbors algorithm. We used the H2O version 3.10.2.2 package [<xref ref-type="bibr" rid="ref23">23</xref>] as the RF implementation and set all parameters to their default values.</p>
        </sec>
        <sec>
          <title>Gradient Boosting Machine</title>
          <p>Gradient boosting machines (GBMs) are also ensembles of weak decision trees, where the gradient boosting method is used to improve the predictive ability of each tree [<xref ref-type="bibr" rid="ref24">24</xref>]. They use greedy function approximation to build a series of weak trees [<xref ref-type="bibr" rid="ref25">25</xref>]. The H2O package also provides the function for the GBM implementation, and we set all parameters to their default values.</p>
          <p>Using the “no free lunch” theorem [<xref ref-type="bibr" rid="ref26">26</xref>], we combined a traditional NLP pipeline with the 3 abovementioned models and tested their performance on our task.</p>
        </sec>
      </sec>
      <sec>
        <title>Word Embedding Combined With a Convolutional Neural Network</title>
        <p>Traditional NLP pipelines are limited by their preexisting dictionary and need to build a complex processing flow. Herein, we propose a method combining a word embedding model and a CNN. Word embedding technology is useful for integrating synonyms, and we used a pretrained GloVe model (English Wikipedia plus Gigaword) to vectorize the words. We selected a 50-dimensional model with 400,000 words because of computing time constraints. However, we believe that this was sufficient because there were only 19,064 words in our 103,390 discharge notes. We transformed each discharge note into an n×50 matrix for subsequent classification (where n is the number of words in the discharge note) and trained a CNN using these labeled matrixes.</p>
        <p>Although CNNs with various structures have been developed, we focused on a 1-layer CNN with a filter region size of 1-5 (corresponding to 1-5 n-gram phrases) to increase comparability with traditional machine learning technologies. In fact, these simple models have recently achieved remarkably strong performance [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. <xref ref-type="fig" rid="figure1">Figure 1</xref> shows the proposed model’s architecture. We set 5 convolution channels, and their convolution layers were as follows: (1) 40 convolving filters with a 1×50 region size, to identify the important words; (2) 30 convolving filters with a 2×50 region size, to identify the important 2-gram phrases; (3) 15 convolving filters with a 3×50 region size, to identify the important 3-gram phrases; (4) 10 convolving filters with a 4×50 region size, to identify the important 4-gram phrases; and (5) 5 convolving filters with a 5×50 region size, to identify the important 5-gram phrases. These convolution layers were connected to a rectified linear unit layer to enhance the nonlinearity of the network. We then applied a max pooling layer over the feature map and took the maximum value. The above steps are similar to those of the keyword recognition process, and 100 features were extracted from each discharge note. To avoid the risk of overfitting, we used a dropout layer with a 50% drop rate after the convolution channels [<xref ref-type="bibr" rid="ref29">29</xref>]. Finally, we used logistic regression to connect the features, and the cross-entropy loss function in the loss layer to train the CNN.</p>
        <p>We used the MXNet version 0.8.0 package [<xref ref-type="bibr" rid="ref30">30</xref>] to implement the above architecture. The settings used for the training model were as follows: (1) minibatch gradient descent with 1000 bench size for optimization; (2) learning rate=.05; (3) momentum coefficient=.9; (4) L2 regularization coefficient=.00001; and (5) tolerance of early stopping per 100 iterations=.0001. <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref> shows an example code for implementing the word embedding and CNNs for free-text discharge note classification.</p>

        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Model architecture with 5 convolution channels and 1 full connection (FC) layer. ReLU: rectified linear unit.</p>
          </caption>
          <graphic xlink:href="jmir_v19i11e380_fig1.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>

      </sec>
      <sec>
        <title>Model Details and Evaluation Index</title>
        <p>We conducted oversampling processing for sufficiently regarding positive cases but not skewing by an overwhelming number of negative cases [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. All the models return a continuous value to evaluate model performance. SVM-related models provide the decision values of the binary classifier; RF and GBM models provide the mean of the probabilities from the decision trees; and CNNs provide the probabilities calculated by the logistic function. We used the receiver operating characteristic curve as an evaluation measure, and the area under the curve (AUC) provided a global measure of effectiveness. Moreover, we provide the F-measure, which is calculated by following equations: <italic>precision</italic> = <italic>TruePositives</italic> / (<italic>TruePositives</italic> + <italic>FalsePositives</italic>); <italic>recall</italic> = <italic>TruePositives</italic> / (<italic>TruePositives</italic> + <italic>FalseNegatives</italic>); <italic>F-measure</italic> = (2 × <italic>precision</italic> × <italic>recall</italic>) / (<italic>precision</italic> + <italic>recall</italic>).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Cross-Validation Test</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the global and lowest 5 means of the training and testing AUCs in the 5-fold cross-validation test. The proposed word embedding plus CNN method provided the highest AUCs (mean testing AUC = 0.9696; mean of the lowest 5 AUCs = 0.9135) and highest F-measures (mean testing F-measure = 0.9086; mean of the lowest 5 F-measures = 0.7651). It is worth noting that the SVM with the linear kernel trick had the highest mean testing AUC of all the traditional methods (mean testing AUC = 0.9571; mean of the lowest 5 AUCs = 0.8891). The performances of the RF, GBM, and linear SVM models were similar (mean testing AUCs of 0.9570, 0.9544, and 0.9571, respectively). However, the RF and GBM models were very inefficient in some tasks (as <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref> shows). The RF and GBM models had a lower mean testing AUC owing to the V00-Y99 <italic>ICD-10-CM</italic> code identification tasks; therefore, the linear SVM was a relatively stable model.</p>

        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Global (and lowest 5) means of training and testing AUCs<sup>a</sup> in the 5-fold cross-validation test.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Pipeline</td>
                <td colspan="2">Training set</td>
                <td colspan="2">Testing set</td>
              </tr>
              <tr valign="top">
                <td colspan="2"><break/></td>
                <td>AUC<sup>b</sup></td>
                <td>F-measure</td>
                <td>AUC<sup>b</sup></td>
                <td>F-measure</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6"><bold>Traditional</bold></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP<sup>c</sup> + SVM<sup>d</sup> (linear)</td>
                <td>0.9947 (0.9836)</td>
                <td>0.9546 (0.8560)</td>
                <td>0.9571 (0.8891)</td>
                <td>0.8606 (0.6387)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + SVM (polynomial)</td>
                <td>0.8627 (0.6736)</td>
                <td>0.5630 (0.2498)</td>
                <td>0.8183 (0.6332)</td>
                <td>0.5050 (0.2023)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + SVM (radial basis)</td>
                <td>0.9565 (0.9146)</td>
                <td>0.7984 (0.6613)</td>
                <td>0.9363 (0.8582)</td>
                <td>0.7569 (0.5352)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + SVM (sigmoid)</td>
                <td>0.9518 (0.9021)</td>
                <td>0.7852 (0.6368)</td>
                <td>0.9325 (0.8526)</td>
                <td>0.7498 (0.5313)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + RF<sup>e</sup></td>
                <td>0.9999 (0.9995)<sup>f</sup></td>
                <td>0.9864 (0.9628)</td>
                <td>0.9570 (0.8800)</td>
                <td>0.8739 (0.6475)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + GBM<sup>g</sup></td>
                <td>0.9996 (0.9990)</td>
                <td>0.9868 (0.9660)</td>
                <td>0.9544 (0.8722)</td>
                <td>0.8691 (0.6458)</td>
              </tr>
              <tr valign="top">
                <td colspan="6"><bold>Proposed</bold></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>GloVe<sup>h</sup> + CNN<sup>i</sup></td>
                <td>0.9964 (0.9890)</td>
                <td>0.9837 (0.9588)</td>
                <td>0.9696 (0.9135)<sup>f</sup></td>
                <td>0.9086 (0.7651)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>AUC: area under the curve, calculated using the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>The results are presented as the mean AUC or F-measure (mean of the lowest 5 AUCs or F-measures). Detailed AUCs and F-measures for each chapter-level <italic>International Classification of Diseases, Tenth Revision, Clinical Modification</italic> (<italic>ICD-10-CM</italic>) diagnosis code are shown in <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>NLP: natural language processing for feature extraction (terms, n-gram phrases, and SNOMED CT categories).</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>RF: random forest.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>The best method for a specific index.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>GBM: gradient boosting machine.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>GloVe: a 50-dimensional word embedding model, pretrained using English Wikipedia and Gigaword.</p>
            </fn>
            <fn id="table2fn9">
              <p><sup>i</sup>CNN: convolutional neural network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>

      </sec>
      <sec>
        <title>Real-World Test</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the global and lowest 5 means of the training and testing AUCs in the real-world test, where the testing samples were split by date. The results of this test were similar to those of the cross-validation test. The testing AUC in the real-world test was lower than that in the cross-validation test, possibly because the heterogeneity between the training and testing samples was higher in the real-world test owing to there being many cyclical diseases. However, our proposed method still had the highest performance on the testing set (mean testing AUC = 0.9645; mean testing F-measure = 0.9003; mean of the lowest 5 AUCs = 0.8952; mean of the lowest 5 F-measures = 0.7204) and achieved the best results in almost all tasks. <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref> shows the detailed training and testing AUCs. The testing AUC of the proposed method is only obviously worse than that of traditional methods for the Q00-Q99 code identification tasks. In addition, the performances of all methods were bad for the V00-Y99 code identification tasks.</p>
      </sec>
      <sec>
        <title>Convolving Filter Analysis</title>
        <p>We visualized 3 of the convolving filters selected for the real-world test, as <xref ref-type="fig" rid="figure2">Figure 2</xref> shows. Neoplasms were the most common <italic>ICD-10-CM</italic> codes in our hospital, and we selected the filter with highest information gain for these. Information gain can be estimated as IG(<italic>C</italic>, <italic>F</italic>) = H(<italic>C</italic>) − H(<italic>C</italic> &#124; <italic>F</italic>), where <italic>C</italic> is the class (a specific <italic>ICD-10-CM</italic> code), <italic>F</italic> is the feature extracted by the convolving filter, and H is the information entropy function. This filter is a word filter that identified several cancer-related words, such as carcinoma and adenocarcinoma, when trained using the training data (<xref ref-type="fig" rid="figure2">Figure 2</xref>, panel A). As expected, these words, embodying similar concepts, were identified by the fuzzy matching technology. Moreover, the same words in the testing data were identified by this convolving filter (<xref ref-type="fig" rid="figure2">Figure 2</xref>, panel B). <xref ref-type="fig" rid="figure2">Figure 2</xref>, panel C shows a 2-gram convolving filter for certain infectious and parasitic diseases, which can identify many pathogens. It is worth mentioning that some pathogens absent in the training data were identified by this filter in the testing data (<xref ref-type="fig" rid="figure2">Figure 2</xref>, panel D). Identifying the external causes of morbidity was the most difficult task for all of the methods, and <xref ref-type="fig" rid="figure2">Figure 2</xref>, panel E shows the most important filter for this task. Some accident-related words were identified, such as fracture and injury, but these words were widely used in our discharge notes. The total number of discharge notes that included these words was 7855, but only 791 discharge notes were coded as V00-Y99 in the training set. This caused the information gain to be very low for the testing set (<xref ref-type="fig" rid="figure2">Figure 2</xref>, panel F).</p>
        <p><xref ref-type="fig" rid="figure3">Figure 3</xref> shows the information gain distribution of the convolving filters in each task, demonstrating large differences between them. The highest-performing classification tasks often extracted high information gain features using convolving filters. Moreover, when the geometric mean of the information gain ratio between the training and testing sets was over 80%, the testing AUC was more than 0.98. It is worth noting that the information gain ratio was very low for Q00-Q99 and V00-Y99 (19.9% and 0.9%, respectively). This may explain the lower performance in these tasks.</p>

        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Global (and lowest 5) means of the training and testing AUCs<sup>a</sup> in the real-world test.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Pipeline</td>
                <td colspan="2">Training set</td>
                <td colspan="2">Testing set</td>
              </tr>
              <tr valign="top">
                <td colspan="2"><break/></td>
                <td>AUC<sup>b</sup></td>
                <td>F-measure</td>
                <td>AUC<sup>b</sup></td>
                <td>F-measure</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6"><bold>Traditional</bold></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP<sup>c</sup> + SVM<sup>d</sup> (linear)</td>
                <td>0.9921 (0.9768)</td>
                <td>0.9365 (0.7983)</td>
                <td>0.9477 (0.8549)</td>
                <td>0.8458 (0.5984)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + SVM (polynomial)</td>
                <td>0.9103 (0.7975)</td>
                <td>0.6316 (0.4045)</td>
                <td>0.8716 (0.7400)</td>
                <td>0.5761 (0.2802)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + SVM (radial basis)</td>
                <td>0.9577 (0.9208)</td>
                <td>0.7954 (0.6484)</td>
                <td>0.9349 (0.8476)</td>
                <td>0.7588 (0.5258)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + SVM (sigmoid)</td>
                <td>0.9522 (0.9058)</td>
                <td>0.7840 (0.6261)</td>
                <td>0.9259 (0.8196)</td>
                <td>0.7515 (0.5209)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + RF<sup>e</sup></td>
                <td>0.9996 (0.9985)<sup>f</sup></td>
                <td>0.9869 (0.9664)<sup>f</sup></td>
                <td>0.9483 (0.8484)</td>
                <td>0.8582 (0.5901)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>NLP + GBM<sup>g</sup></td>
                <td>0.9995 (0.9985)</td>
                <td>0.9821 (0.9562)</td>
                <td>0.9462 (0.8416)</td>
                <td>0.8568 (0.5948)</td>
              </tr>
              <tr valign="top">
                <td colspan="6"><bold>Proposed</bold></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>GloVe<sup>h</sup> + CNN<sup>i</sup></td>
                <td>0.9956 (0.9868)</td>
                <td>0.9803 (0.9523)</td>
                <td>0.9645 (0.8952)<sup>f</sup></td>
                <td>0.9003 (0.7204)<sup>f</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>AUC: area under the curve, calculated using the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>The results are presented as the mean AUC or F-measure (mean of the lowest 5 AUCs or F-measures). Detailed AUCs and F-measures for each chapter-level <italic>International Classification of Diseases, Tenth Revision, Clinical Modification</italic> (<italic>ICD-10-CM</italic>) diagnosis code are shown in <xref ref-type="app" rid="app3">Multimedia Appendix 3</xref>.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>NLP: natural language processing for feature extraction (terms, n-gram phrases, and SNOMED CT categories).</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>RF: random forest.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>The best method for a specific index.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>GBM: gradient boosting machine.</p>
            </fn>
            <fn id="table3fn8">
              <p><sup>h</sup>GloVe: a 50-dimensional word embedding model, pretrained using English Wikipedia and Gigaword.</p>
            </fn>
            <fn id="table3fn9">
              <p><sup>i</sup>CNN: convolutional neural network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>

        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Visualization of selected convolving filters.</p>
          </caption>
          <graphic xlink:href="jmir_v19i11e380_fig2.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>

        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Information gains of the features extracted by the convolving filters in each classification task. AUC: area under the curve; IG: information gain.</p>
          </caption>
          <graphic xlink:href="jmir_v19i11e380_fig3.jpg" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>

      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The proposed method, which combines word embedding with a CNN, had a higher testing accuracy than all traditional NLP-based approaches, regardless of the situation. Further analysis showed that convolving filters had fuzzy matching abilities, which greatly reduced the data dimension for the final classification task. Moreover, the training AUCs of the traditional methods were very close to 1. This means that there was no possibility of improvement, and the larger difference between training set and testing set performances implies overfitting.</p>
        <p>Arbitrary free-text medical narratives include many word combinations, and there is no good way of integrating similar terms using the current NLP pipelines. Previous studies have highlighted this issue and suggested that improvements are possible by dealing more effectively with the idiosyncrasies of the clinical sublanguage [<xref ref-type="bibr" rid="ref7">7</xref>]. We believe that our proposal has an advantage in this respect. The used fuzzy matching technology offers a real chance of reducing the risk of overfitting. This is not surprising, as CNNs have achieved excellent results in some text mining tasks [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. This study also demonstrated the advantages of using CNNs for free-text medical narrative classification.</p>
        <p>Our proposed method not only increased the accuracy compared with traditional methods, but also can avoid troublesome data preprocessing. Our solution for avoiding troublesome data preprocessing is based on word embedding, which can learn semantics from external resources. The vocabularies are mapped to vectors of real numbers, and the word vectors for similar concepts are likewise close. In our work, a discharge note is converted into an n×50 matrix, where n is the number of words, and CNN classifies this matrix based on our designed convolving filters. Because the word vectors for similar concepts are likewise close in terms, convolutional layers effectively identified a large number of keywords in a convolving filter (data shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>.). Finally, we used the document features extracted by these convolving filters to identify <italic>ICD-10-CM</italic> diagnosis codes. This simple idea effectively deals with the idiosyncrasies of the clinical sublanguage, so the proposed method does not require data preprocessing by external dictionaries.</p>
        <p>All the classifiers used in this study performed poorly on V00-Y99 (external causes of morbidity) coding tasks, which may be attributed to sparse testing data (0.2%). A previous study found that classifier performance was better on common cancers than on rare cancers [<xref ref-type="bibr" rid="ref2">2</xref>]. However, the performance of the proposed method was clearly better than that of traditional methods. The Q00-Q99 (congenital malformations, deformations, and chromosomal abnormalities) coding tasks were the next key point, as our method was obviously worse than traditional methods in these tasks. After further analysis, we found that the most common second-level <italic>ICD-10-CM</italic> diagnosis codes in Q00-Q99 are Q80-Q89 (other congenital malformations), and the words used in these discharge notes were really complex. This means that our CNN may have needed more convolving filters to handle this issue. After we doubled the number of filters and retrained the CNN, the testing AUC greatly improved (testing AUCs of 0.9203 and 0.9235 in the cross-validation test and the real-world test, respectively). Hence, although a simple 1-layer CNN has already shown outstanding performance in our experiments, we believe that there are many opportunities to improve the performance of the proposed model.</p>
        <p>All traditional term-based classifiers face the problem that emerging diseases cannot possibly be correctly classified. For example, influenza H1N1 could not possibly have been recorded in clinical narratives from 2000 to 2007, so term-based classifiers could not have been aware of the H1N1 pandemic of 2009 [<xref ref-type="bibr" rid="ref3">3</xref>]. Our method can handle this problem using fuzzy matching technology. Although H1N1 was not recorded in discharge notes from 2000 to 2007, there was enough information to allow the machine to understand that H1N1 was an influenza subtype. In our pretrained GloVe model, H1N1 was very close to some influenza-related terms, such as “swine,” “influenza,” “flu,” and “H5N1” (the cosine similarities were 0.835, 0.832, 0.831, and 0.716, respectively). Thus, we believe that convolving filters could still have correctly identified H1N1 and classified related discharge notes as A00-B99 (certain infectious and parasitic diseases), but more precise coding would have been difficult. Thus, retraining or incrementally updating the classifiers would still be necessary; otherwise, emerging diseases would be merged into similar disease categories. However, this is still an important breakthrough in the free-text medical writing classification task.</p>
        <p>Previous studies described the classification methods used by human experts, and several rule-based approaches have demonstrated superior performance [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. The only problem with rule-based approaches is that adding new diseases requires the development of new models and rules. RF models use an ensemble of decision trees, where each interior node is differentiated on the basis of 1 of the terms. We consider the similarity between RF and rule-based approaches to be higher than with the proposed CNN. The machine must imitate human behavior patterns to improve its correctness. The RF model showed better performance than traditional classifiers in most identification tasks (mean testing ranks of 3.000 and 3.190 in the cross-validation test and real-world test, respectively), possibly attributed to the RF model having a similar identification process to that of human experts. The proposed CNN architecture uses a logistic function for output, similar to a linear SVM, although nonlinear SVMs showed a lower training AUC, which may have been due to wrong assumptions about the relationship between features and the outcome. This evidence shows that the assumption of a linear relationship between extracted features and outcome is better than a nonlinear assumption, and the architecture of our CNN also follows this linear assumption in its last layer. However, rule-based approaches are more inclined to use positive terms than negative ones [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], so the architecture of RF or GBM is better than a linear classifier. The proposed CNN showed the highest accuracy; the key to success is not our network architecture but the fuzzy term matching technology. Fuzzy term matching reduces the hazard of overfitting, and the mean training AUCs for the RF and GBM models were higher than those for the other models, possibly indicating that overfitting is more risky in RF and GBM models. In summary, we consider that a deeper CNN may provide more accurate predictive ability. Further studies need to consider this to improve the performance of word embedding combined with a CNN.</p>
        <p>Outbreaks of deliberate and natural infectious disease can lead to massive casualties unless public health actions are promptly instituted [<xref ref-type="bibr" rid="ref34">34</xref>]. Thus, many countries have been building real-time infectious disease surveillance systems, such as the Real-time Outbreak and Disease Surveillance system [<xref ref-type="bibr" rid="ref35">35</xref>]. The implementation principle of the Real-time Outbreak and Disease Surveillance system is through the structured <italic>ICD</italic> code, and it needs real-time manual identification by emergency physicians. However, this system cannot be extended to all diseases because a lot of resources are required. In addition to infectious diseases, other chronic diseases also need to be surveilled in real time [<xref ref-type="bibr" rid="ref36">36</xref>]. Government health administrators need timely information to rapidly assess disease prevention and health protection priorities. A timely automated disease classification algorithm is required. Our proposed method provides a viable pipeline for implementing a disease surveillance system of all diseases. It not only improves classification performance but also avoids the inherent limitations of traditional methods. Subsequent studies can use this algorithm to further develop fully automated disease surveillance systems.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Several potential limitations of this study should be acknowledged. First, we used only a 50-dimensional GloVe model to process our data, to reduce computing time. However, even a 50-dimensional model has better performance than traditional methods. Thus, we believe that this will not affect our result and that our proposal is a better solution for conducting free-text medical narrative coding tasks. Second, this study included discharge notes from only a single hospital, so we cannot confirm how well it would generalize to other data sources. Although this study only provided a feasibility assessment for extrapolation over time, we believe that it still demonstrated the superiority of our method. Third, this study conducted the classification task only in discharge notes. Discharge notes describe only the presence of the disease, but do not include negative statements. Our CNN architecture includes 3- to 5-gram phrase identifiers, but further studies are still needed to apply this approach to patient progress notes to prove its ability.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Our study showed that combining CNNs with word embedding is a viable analysis pipeline for disease classification from free-text medical narratives. Moreover, it showed outstanding performance compared with traditional NLP employing machine learning classifiers and may avoid troublesome data preprocessing. More complex CNNs could be used to further improve predictive performance, and future studies will not be limited by incomplete dictionaries. Because our data were collected from a single center, further studies can implement this algorithm in other hospitals. We hope our experiment will lead to a range of studies toward developing more efficient automated classification approaches and that a large amount of unstructured information will be extracted from free-text medical writing. We have developed a Web app to demonstrate our work [<xref ref-type="bibr" rid="ref37">37</xref>]. Public health surveillance systems would become more efficient, and government health administrators would be able to take timely and correct action for disease prevention and health protection. When previously unlabeled clinical records are labeled using such an automated approach, we can obtain more data-driven clues to help promote the progress of medicine. The health care field will then truly enter the age of big data.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>ICD-10-CM diagnosis code tutorial.</p>
        <media xlink:href="jmir_v19i11e380_app1.pdf" xlink:title="PDF File (Adobe PDF File), 1MB"/>
      </app>
      <app id="app2">
        <title>Multimedia Appendix 2</title>
        <p>Detailed training and testing AUCs and F-measures for the 5-fold cross-validation test.</p>
        <media xlink:href="jmir_v19i11e380_app2.pdf" xlink:title="PDF File (Adobe PDF File), 54KB"/>
      </app>
      <app id="app3">
        <title>Multimedia Appendix 3</title>
        <p>Detailed training and testing AUCs and F-measures for the real-world test.</p>
        <media xlink:href="jmir_v19i11e380_app3.pdf" xlink:title="PDF File (Adobe PDF File), 54KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">GBM</term>
          <def>
            <p>gradient boosting machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ICD-10-CM</term>
          <def>
            <p>International Classification of Diseases, Tenth Revision, Clinical Modification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by the Smart Healthcare Project from the Medical Affairs Bureau Ministry of National Defense, Taiwan. Funding was supported by the Ministry of Science and Technology (105-2314-B-016-053) and Medical Affairs Bureau Ministry of National Defense (MAB-104-013). The authors appreciate the Medical Records Office at Tri-Service General Hospital for providing the unlinked data source.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>LM</given-names>
          </name>
          <name name-style="western">
            <surname>Thacker</surname>
            <given-names>SB</given-names>
          </name>
        </person-group>
        <article-title>Public health surveillance and knowing about health in the context of growing sources of health data</article-title>
        <source>Am J Prev Med</source>  
        <year>2011</year>  
        <month>12</month>  
        <volume>41</volume>  
        <issue>6</issue>  
        <fpage>636</fpage>  
        <lpage>40</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.amepre.2011.08.015</pub-id>
        <pub-id pub-id-type="medline">22099242</pub-id>
        <pub-id pub-id-type="pii">S0749-3797(11)00674-X</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Koopman</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Zuccon</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Bergheim</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Grayson</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Automatic ICD-10 classification of cancers from free-text death certificates</article-title>
        <source>Int J Med Inform</source>  
        <year>2015</year>  
        <month>11</month>  
        <volume>84</volume>  
        <issue>11</issue>  
        <fpage>956</fpage>  
        <lpage>65</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2015.08.004</pub-id>
        <pub-id pub-id-type="medline">26323193</pub-id>
        <pub-id pub-id-type="pii">S1386-5056(15)30028-9</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Koopman</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Karimi</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>McGuire</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Muscatello</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Kemp</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Truran</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Thackway</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Automatic classification of diseases from free-text death certificates for real-time surveillance</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2015</year>  
        <month>07</month>  
        <day>15</day>  
        <volume>15</volume>  
        <fpage>53</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-015-0174-2"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s12911-015-0174-2</pub-id>
        <pub-id pub-id-type="medline">26174442</pub-id>
        <pub-id pub-id-type="pii">10.1186/s12911-015-0174-2</pub-id>
        <pub-id pub-id-type="pmcid">PMC4502908</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Koopman</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Zuccon</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Wagholikar</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Chu</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>O'Dwyer</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Keijzers</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Automated reconciliation of radiology reports and discharge summaries</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2015</year>  
        <month>11</month>  
        <volume>2015</volume>  
        <fpage>775</fpage>  
        <lpage>84</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26958213"/>
        </comment>  
        <pub-id pub-id-type="medline">26958213</pub-id>
        <pub-id pub-id-type="pmcid">PMC4765582</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Khachidze</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Tsintsadze</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Archuadze</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Natural language processing based instrument for classification of free text medical records</article-title>
        <source>Biomed Res Int</source>  
        <year>2016</year>  
        <month>9</month>  
        <volume>2016</volume>  
        <fpage>8313454</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://dx.doi.org/10.1155/2016/8313454"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1155/2016/8313454</pub-id>
        <pub-id pub-id-type="medline">27668260</pub-id>
        <pub-id pub-id-type="pmcid">PMC5030470</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mujtaba</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Shuib</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Raj</surname>
            <given-names>RG</given-names>
          </name>
          <name name-style="western">
            <surname>Rajandram</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Shaikh</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Al-Garadi</surname>
            <given-names>MA</given-names>
          </name>
        </person-group>
        <article-title>Automatic ICD-10 multi-class classification of cause of death from plaintext autopsy reports through expert-driven feature selection</article-title>
        <source>PLoS One</source>  
        <year>2017</year>  
        <month>2</month>  
        <volume>12</volume>  
        <issue>2</issue>  
        <fpage>e0170242</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0170242"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0170242</pub-id>
        <pub-id pub-id-type="medline">28166263</pub-id>
        <pub-id pub-id-type="pii">PONE-D-16-33478</pub-id>
        <pub-id pub-id-type="pmcid">PMC5293233</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Spasić</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Livsey</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Keane</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Nenadić</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Text mining of cancer-related information: review of current status and future directions</article-title>
        <source>Int J Med Inform</source>  
        <year>2014</year>  
        <month>09</month>  
        <volume>83</volume>  
        <issue>9</issue>  
        <fpage>605</fpage>  
        <lpage>23</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(14)00110-5"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2014.06.009</pub-id>
        <pub-id pub-id-type="medline">25008281</pub-id>
        <pub-id pub-id-type="pii">S1386-5056(14)00110-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bengio</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Ducharme</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Vincent</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Jauvin</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>A neural probabilistic language model</article-title>
        <source>J Mach Learn Res</source>  
        <year>2003</year>  
        <month>2</month>  
        <volume>3</volume>  
        <fpage>1137</fpage>  
        <lpage>1155</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmlr.org/papers/v3/bengio03a.html"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yih</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Toutanova</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Platt</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Meek</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Learning discriminative projections for text similarity measures</article-title>
        <year>2011</year>  
        <month>7</month>  
        <conf-name>Fifteenth Conference on Computational Natural Language Learning</conf-name>
        <conf-date>Jun 23-24, 2011</conf-date>
        <conf-loc>Stroudsburg, PA, USA</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://dl.acm.org/citation.cfm?id=2018965"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mikolov</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Sutskever</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Corrado</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Dean</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Distributed representations of words and phrases and their compositionality</article-title>
        <year>2013</year>  
        <month>12</month>  
        <conf-name>2014 Conference on Advances in Neural information processing systems</conf-name>
        <conf-date>Dec 9, 2013</conf-date>
        <conf-loc>Lake Tahoe, NV, USA</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pennington</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Socher</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Manning</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>GloVe: global vectors for word representation</article-title>
        <year>2014</year>  
        <month>10</month>  
        <conf-name>Conference on Empirical Methods in Natural Language Processing</conf-name>
        <conf-date>Oct 26-28, 2014</conf-date>
        <conf-loc>Doha, Qatar</conf-loc>
        <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lecun</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Bottou</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Bengio</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Haffner</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Gradient-based learning applied to document recognition</article-title>
        <source>Proc. IEEE</source>  
        <year>1998</year>  
        <month>11</month>  
        <volume>86</volume>  
        <issue>11</issue>  
        <fpage>2278</fpage>  
        <lpage>2324</lpage>  
        <pub-id pub-id-type="doi">10.1109/5.726791</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yih</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>He</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Meek</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Semantic parsing for single-relation question answering</article-title>
        <year>2014</year>  
        <month>7</month>  
        <conf-name>52nd Annual Meeting of the Association for Computational Linguistics</conf-name>
        <conf-date>Jun 23-25, 2014</conf-date>
        <conf-loc>Baltimore, MD, USA</conf-loc>
        <fpage>23</fpage>  
        <lpage>25</lpage> </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>He</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Gao</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Deng</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Mesnil</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Learning semantic representations using convolutional neural networks for web search</article-title>
        <year>2014</year>  
        <month>4</month>  
        <conf-name>23rd International Conference on World Wide Web</conf-name>
        <conf-date>Apr 7-11, 2014</conf-date>
        <conf-loc>Seoul, Korea</conf-loc>
        <fpage>7</fpage>  
        <lpage>11</lpage>  
        <pub-id pub-id-type="doi">10.1145/2567948.2577348</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Convolutional neural networks for sentence classification</article-title>
        <year>2014</year>  
        <month>10</month>  
        <conf-name>Conference on Empirical Methods in Natural Language Processing</conf-name>
        <conf-date>Oct 26-28, 2014</conf-date>
        <conf-loc>Doha, Qatar</conf-loc>
        <fpage>26</fpage>  
        <lpage>28</lpage>  
        <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hornik</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Murdoch</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Watch your spelling!</article-title>
        <source>The R Journal</source>  
        <year>2010</year>  
        <month>9</month>  
        <volume>3</volume>  
        <issue>2</issue>  
        <fpage>22</fpage>  
        <lpage>28</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://journal.r-project.org/archive/2011/RJ-2011-014/index.html"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Graff</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Cieri</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <source>English Gigaword LDC2003T05</source>  
        <year>2003</year>  
        <access-date>2017-10-30</access-date>
        <publisher-loc>Philadelphia, PA</publisher-loc>
        <publisher-name>Liguistic Data Consortium</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://catalog.ldc.upenn.edu/ldc2003t05">https://catalog.ldc.upenn.edu/ldc2003t05</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ubOmjzlO"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hornik</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Buchta</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Zeileis</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Open-source machine learning: R meets Weka</article-title>
        <source>Comput Stat</source>  
        <year>2008</year>  
        <month>5</month>  
        <day>14</day>  
        <volume>24</volume>  
        <issue>2</issue>  
        <fpage>225</fpage>  
        <lpage>232</lpage>  
        <pub-id pub-id-type="doi">10.1007/s00180-008-0119-7</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Feinerer</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Hornik</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Meyer</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Text mining infrastructure in R</article-title>
        <source>J Stat Softw</source>  
        <year>2008</year>  
        <month>3</month>  
        <volume>25</volume>  
        <issue>5</issue>  
        <fpage>125574</fpage>  
        <pub-id pub-id-type="doi">10.18637/jss.v025.i05</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Butt</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Zuccon</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Bergheim</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Grayson</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Classification of cancer-related death certificates using machine learning</article-title>
        <source>Australas Med J</source>  
        <year>2013</year>  
        <month>3</month>  
        <volume>6</volume>  
        <issue>5</issue>  
        <fpage>292</fpage>  
        <lpage>299</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23745151"/>
        </comment>  
        <pub-id pub-id-type="doi">10.4066/AMJ.2013.1654</pub-id>
        <pub-id pub-id-type="medline">23745151</pub-id>
        <pub-id pub-id-type="pii">20131654</pub-id>
        <pub-id pub-id-type="pmcid">PMC3674421</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Meyer</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Dimitriadou</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Hornik</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Leisch</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Weingessel</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Leisch</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>CC</given-names>
          </name>
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>CC</given-names>
          </name>
        </person-group>
        <source>The Comprehensive R Archive Network</source>  
        <year>2017</year>  
        <month>02</month>  
        <access-date>2017-10-30</access-date>
        <comment>e1071: misc functions of the Department of Statistics, Probability Theory Group (formerly E1071), TU Wien 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://CRAN.R-project.org/package=e1071">https://CRAN.R-project.org/package=e1071</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6uaY1A0It"/></comment> </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Al-garadi</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Varathan</surname>
            <given-names>KD</given-names>
          </name>
          <name name-style="western">
            <surname>Ravana</surname>
            <given-names>SD</given-names>
          </name>
        </person-group>
        <article-title>Cybercrime detection in online communications: the experimental case of cyberbullying detection in the Twitter network</article-title>
        <source>Comput Hum Behav</source>  
        <year>2016</year>  
        <month>5</month>  
        <volume>63</volume>  
        <fpage>433</fpage>  
        <lpage>443</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.chb.2016.05.051</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Candel</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Lanford</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>LeDell</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Parmar</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Arora</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <source>Deep learning with H2O, 3rd edition</source>  
        <year>2015</year>  
        <month>8</month>  
        <access-date>2017-10-29</access-date>
        <publisher-loc>Mountain View, CA</publisher-loc>
        <publisher-name>H2O.ai, Inc</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.scribd.com/doc/312515027/Deep-Learning-with-H2O">https://www.scribd.com/doc/312515027/Deep-Learning-with-H2O</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6uaYPhzCU"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Natekin</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Knoll</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Gradient boosting machines, a tutorial</article-title>
        <source>Front Neurorobot</source>  
        <year>2013</year>  
        <month>12</month>  
        <volume>7</volume>  
        <fpage>21</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://dx.doi.org/10.3389/fnbot.2013.00021"/>
        </comment>  
        <pub-id pub-id-type="doi">10.3389/fnbot.2013.00021</pub-id>
        <pub-id pub-id-type="medline">24409142</pub-id>
        <pub-id pub-id-type="pmcid">PMC3885826</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Friedman</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Greedy function approximation: a gradient boosting machine</article-title>
        <source>Ann Stat</source>  
        <year>2001</year>  
        <volume>29</volume>  
        <issue>5</issue>  
        <fpage>1189</fpage>  
        <lpage>1232</lpage> </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wolpert</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Macready</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <article-title>No free lunch theorems for optimization</article-title>
        <source>IEEE Trans Evol Comput</source>  
        <year>1997</year>  
        <month>4</month>  
        <volume>1</volume>  
        <issue>1</issue>  
        <fpage>67</fpage>  
        <lpage>82</lpage>  
        <pub-id pub-id-type="doi">10.1109/4235.585893</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kalchbrenner</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Grefenstette</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Blunsom</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>A convolutional neural network for modelling sentences</article-title>
        <year>2014</year>  
        <month>6</month>  
        <conf-name>52nd Annual Meeting of the Association for Computational Linguistics</conf-name>
        <conf-date>Jun 23-25, 2014</conf-date>
        <conf-loc>Baltimore, MD, USA</conf-loc>
        <fpage>23</fpage>  
        <lpage>25</lpage>  
        <pub-id pub-id-type="doi">10.3115/v1/P14-1062</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Wallace</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <source>A sensitivity analysis of (and practitioners' guide to) convolutional neural networks for sentence classification. arXiv 2015: 1510.03820</source>  
        <year>2015</year>  
        <access-date>2017-10-30</access-date>
        <publisher-loc>Ithaca, NY</publisher-loc>
        <publisher-name>Cornell University Library</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/abs/1510.03820v1">https://arxiv.org/abs/1510.03820v1</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ubOIti8Q"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Srivastava</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Hinton</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Krizhevsky</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Sutskever</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Salakhutdinov</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Dropout: a simple way to prevent neural networks from overfitting</article-title>
        <source>J Mach Learn Res</source>  
        <year>2014</year>  
        <month>6</month>  
        <fpage>1929</fpage>  
        <lpage>1958</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jmlr.org/papers/v15/srivastava14a.html"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Xiao</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Z</given-names>
          </name>
        </person-group>
        <article-title>MXNet: a flexible and efficient machine learning library for heterogeneous distributed systems</article-title>
        <year>2016</year>  
        <conf-name>Neural Information Processing Systems</conf-name>
        <conf-date>Dec 10, 2016</conf-date>
        <conf-loc>Barcelona, Spain</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/abs/1512.01274"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chai</surname>
            <given-names>KEK</given-names>
          </name>
          <name name-style="western">
            <surname>Anthony</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Coiera</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Magrabi</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Using statistical text classification to identify health information technology incidents</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2013</year>  
        <month>9</month>  
        <volume>20</volume>  
        <issue>5</issue>  
        <fpage>980</fpage>  
        <lpage>985</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23666777"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2012-001409</pub-id>
        <pub-id pub-id-type="medline">23666777</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2012-001409</pub-id>
        <pub-id pub-id-type="pmcid">PMC3756261</pub-id></nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Simpson</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <source>Over-sampling in a deep neural network. arXiv 2015: 1502.03648</source>  
        <year>2015</year>  
        <access-date>2017-10-30</access-date>
        <publisher-loc>Ithaca, NY</publisher-loc>
        <publisher-name>Cornell University Library</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/abs/1502.03648">https://arxiv.org/abs/1502.03648</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6ubOQqAKk"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Muscatello</surname>
            <given-names>DJ</given-names>
          </name>
          <name name-style="western">
            <surname>Morton</surname>
            <given-names>PM</given-names>
          </name>
          <name name-style="western">
            <surname>Evans</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Gilmour</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Prospective surveillance of excess mortality due to influenza in New South Wales: feasibility and statistical approach</article-title>
        <source>Commun Dis Intell Q Rep</source>  
        <year>2008</year>  
        <month>12</month>  
        <volume>32</volume>  
        <issue>4</issue>  
        <fpage>435</fpage>  
        <lpage>42</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.health.gov.au/internet/main/publishing.nsf/Content/cda-cdi3204f.htm"/>
        </comment>  
        <pub-id pub-id-type="medline">19374272</pub-id></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dembek</surname>
            <given-names>ZF</given-names>
          </name>
          <name name-style="western">
            <surname>Kortepeter</surname>
            <given-names>MG</given-names>
          </name>
          <name name-style="western">
            <surname>Pavlin</surname>
            <given-names>JA</given-names>
          </name>
        </person-group>
        <article-title>Discernment between deliberate and natural infectious disease outbreaks</article-title>
        <source>Epidemiol Infect</source>  
        <year>2007</year>  
        <month>04</month>  
        <volume>135</volume>  
        <issue>3</issue>  
        <fpage>353</fpage>  
        <lpage>71</lpage>  
        <pub-id pub-id-type="doi">10.1017/S0950268806007011</pub-id>
        <pub-id pub-id-type="medline">16893485</pub-id>
        <pub-id pub-id-type="pii">S0950268806007011</pub-id>
        <pub-id pub-id-type="pmcid">PMC2870591</pub-id></nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tsui</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Espino</surname>
            <given-names>JU</given-names>
          </name>
          <name name-style="western">
            <surname>Dato</surname>
            <given-names>VM</given-names>
          </name>
          <name name-style="western">
            <surname>Gesteland</surname>
            <given-names>PH</given-names>
          </name>
          <name name-style="western">
            <surname>Hutman</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Wagner</surname>
            <given-names>MM</given-names>
          </name>
        </person-group>
        <article-title>Technical description of RODS: a real-time public health surveillance system</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2003</year>  
        <month>9</month>  
        <volume>10</volume>  
        <issue>5</issue>  
        <fpage>399</fpage>  
        <lpage>408</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/12807803"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M1345</pub-id>
        <pub-id pub-id-type="medline">12807803</pub-id>
        <pub-id pub-id-type="pii">M1345</pub-id>
        <pub-id pub-id-type="pmcid">PMC212776</pub-id></nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mitra</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Hutchings</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Shaw</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Barber</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Sung</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Bernstein</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Carret</surname>
            <given-names>AS</given-names>
          </name>
          <name name-style="western">
            <surname>Barbaros</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>McBride</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Parker</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Stewart</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Strahlendorf</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Status Report – The Cancer in Young People in Canada surveillance system</article-title>
        <source>Health Promot Chronic Dis Prev Can</source>  
        <year>2015</year>  
        <month>06</month>  
        <volume>35</volume>  
        <issue>4</issue>  
        <fpage>73</fpage>  
        <lpage>6</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.phac-aspc.gc.ca/publicat/hpcdp-pspmc/35-4/ar-02-eng.php"/>
        </comment>  
        <pub-id pub-id-type="medline">26083522</pub-id>
        <pub-id pub-id-type="pmcid">PMC4910422</pub-id></nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <source>ICD-10 CM classificator</source>  
        <year>2017</year>  
        <access-date>2017-10-27</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://chin-lin.shinyapps.io/icd10/">https://chin-lin.shinyapps.io/icd10/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6uX5IBFWP"/>
        </comment> </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
