<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v20i7e236</article-id>
    <article-id pub-id-type="pmid">29986843</article-id>
    <article-id pub-id-type="doi">10.2196/jmir.9413</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Public Perception Analysis of Tweets During the 2015 Measles Outbreak: Comparative Study Using Convolutional Neural Network Models</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>He</surname>
          <given-names>Zhe</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Bian</surname>
          <given-names>Jiang</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Limsopatham</surname>
          <given-names>Nut</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" equal-contrib="yes">
        <name name-style="western">
          <surname>Du</surname>
          <given-names>Jingcheng</given-names>
        </name>
        <degrees>BS</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-0322-4566</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib2" equal-contrib="yes">
        <name name-style="western">
          <surname>Tang</surname>
          <given-names>Lu</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-1850-1511</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Xiang</surname>
          <given-names>Yang</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1395-6805</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Zhi</surname>
          <given-names>Degui</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7754-1890</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Xu</surname>
          <given-names>Jun</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-1823-3633</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib6">
        <name name-style="western">
          <surname>Song</surname>
          <given-names>Hsing-Yi</given-names>
        </name>
        <degrees>MPH</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-5463-3829</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib7" corresp="yes">
      <name name-style="western">
        <surname>Tao</surname>
        <given-names>Cui</given-names>
      </name>
      <degrees>PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>School of Biomedical Informatics</institution>
        <institution>The University of Texas Health Science Center at Houston</institution>
        <addr-line>7000 Fannin Street</addr-line>
        <addr-line>Houston, TX,</addr-line>
        <country>United States</country>
        <phone>1 713 500 3981</phone>
        <email>cui.tao@uth.tmc.edu</email>
      </address>  
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-4267-1924</ext-link></contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>School of Biomedical Informatics</institution>
    <institution>The University of Texas Health Science Center at Houston</institution>  
    <addr-line>Houston, TX</addr-line>
    <country>United States</country></aff>
    <aff id="aff2">
    <sup>2</sup>
    <institution>Department of Communication</institution>
    <institution>College of Liberal Arts</institution>  
    <institution>Texas A&#38;M University</institution>  
    <addr-line>College Station, TX</addr-line>
    <country>United States</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Cui Tao 
      <email>cui.tao@uth.tmc.edu</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><month>07</month><year>2018</year></pub-date>
    <pub-date pub-type="epub">
      <day>09</day>
      <month>07</month>
      <year>2018</year>
    </pub-date>
    <volume>20</volume>
    <issue>7</issue>
    <elocation-id>e236</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>14</day>
        <month>11</month>
        <year>2017</year>
      </date>
      <date date-type="rev-request">
        <day>28</day>
        <month>12</month>
        <year>2017</year>
      </date>
      <date date-type="rev-recd">
        <day>1</day>
        <month>4</month>
        <year>2018</year>
      </date>
      <date date-type="accepted">
        <day>10</day>
        <month>5</month>
        <year>2018</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Jingcheng Du, Lu Tang, Yang Xiang, Degui Zhi, Jun Xu, Hsing-Yi Song, Cui Tao. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 09.07.2018.</copyright-statement>
    <copyright-year>2018</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://www.jmir.org/2018/7/e236/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>Timely understanding of public perceptions allows public health agencies to provide up-to-date responses to health crises such as infectious diseases outbreaks. Social media such as Twitter provide an unprecedented way for the prompt assessment of the large-scale public response.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>The aims of this study were to develop a scheme for a comprehensive public perception analysis of a measles outbreak based on Twitter data and demonstrate the superiority of the convolutional neural network (CNN) models (compared with conventional machine learning methods) on measles outbreak-related tweets classification tasks with a relatively small and highly unbalanced gold standard training set.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We first designed a comprehensive scheme for the analysis of public perception of measles based on tweets, including 3 dimensions: discussion themes, emotions expressed, and attitude toward vaccination. All 1,154,156 tweets containing the word “measles” posted between December 1, 2014, and April 30, 2015, were purchased and downloaded from DiscoverText.com. Two expert annotators curated a gold standard of 1151 tweets (approximately 0.1% of all tweets) based on the 3-dimensional scheme. Next, a tweet classification system based on the CNN framework was developed. We compared the performance of the CNN models to those of 4 conventional machine learning models and another neural network model. We also compared the impact of different word embeddings configurations for the CNN models: (1) Stanford GloVe embedding trained on billions of tweets in the general domain, (2) measles-specific embedding trained on our 1 million measles related tweets, and (3) a combination of the 2 embeddings.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>Cohen kappa intercoder reliability values for the annotation were: 0.78, 0.72, and 0.80 on the 3 dimensions, respectively. Class distributions within the gold standard were highly unbalanced for all dimensions. The CNN models performed better on all classification tasks than k-nearest neighbors, naïve Bayes, support vector machines, or random forest. Detailed comparison between support vector machines and the CNN models showed that the major contributor to the overall superiority of the CNN models is the improvement on recall, especially for classes with low occurrence. The CNN model with the 2 embedding combination led to better performance on discussion themes and emotions expressed (microaveraging F1 scores of 0.7811 and 0.8592, respectively), while the CNN model with Stanford embedding achieved best performance on attitude toward vaccination (microaveraging F1 score of 0.8642).</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>The proposed scheme can successfully classify the public’s opinions and emotions in multiple dimensions, which would facilitate the timely understanding of public perceptions during the outbreak of an infectious disease. Compared with conventional machine learning methods, our CNN models showed superiority on measles-related tweet classification tasks with a relatively small and highly unbalanced gold standard. With the success of these tasks, our proposed scheme and CNN-based tweets classification system is expected to be useful for the analysis of tweets about other infectious diseases such as influenza and Ebola.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>convolutional neural networks</kwd>
      <kwd>social media</kwd>
      <kwd>measles</kwd>
      <kwd>public perception</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Nearly 40 million cases of measles, caused by a highly contagious virus, lead to over 300,000 deaths worldwide every year [<xref ref-type="bibr" rid="ref1">1</xref>]. In the United States, measles was officially declared to be eliminated in 2000 thanks to the successful nationwide administration of a 2-dose vaccination program [<xref ref-type="bibr" rid="ref2">2</xref>]. However, recent years have seen the reemergence of measles outbreaks in the United States. The most recent large-scale measles outbreak occurred in early 2015 with a high concentration of cases in California [<xref ref-type="bibr" rid="ref3">3</xref>]. Researchers believe that increasing rates of vaccination refusal and undervaccination have made the public more vulnerable to this potentially deadly disease [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      <p>During an outbreak of an infectious disease such as measles, responsible public health agencies need to send out timely messages to the public during different stages of the crisis [<xref ref-type="bibr" rid="ref5">5</xref>]. For instance, the Centers for Disease Control and Prevention (CDC) has adopted a 5-stage model of crisis and emergency risk communication, including precrisis, initial event, maintenance, resolution, and evaluation [<xref ref-type="bibr" rid="ref5">5</xref>]. Prompt understanding of the public’s perceptions will allow public health agencies to respond to people’s attitudes, emotions, and needs in real time instead of relying on a predetermined timeline based on stages. Using traditional methods such as surveys to study public perceptions during an infectious disease outbreak is both costly and time-consuming [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>Social media have been increasingly used by the general public, patients, and health professionals to communicate about health-related issues [<xref ref-type="bibr" rid="ref7">7</xref>]. Researchers have studied social media content for drug adverse events detection [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], assessment of public opinion about health-related issues such as vaccination [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref13">13</xref>], and infectious disease outbreak surveillance [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Twitter, one of the largest public social media in the world, provides unique insights into how the public responds to an infectious disease outbreak as users, in real time, share information about the outbreak, talk about their personal experiences, argue over the necessity and safety of vaccination, and express a wide range of emotions. Examining Twitter content can provide an immediate assessment of the public’s response and will allow public health professionals to adapt their messages to communicate with the public more effectively.</p>
      <p>Many studies have used Twitter to assess various public health topics. However, most of the studies thus far have focused on analyzing the frequency of postings rather than on understanding post contents [<xref ref-type="bibr" rid="ref16">16</xref>]. There is an increasing need to develop automatic and scalable approaches for the accurate understanding of the high volume of Twitter posts. Recent advances in machine learning and natural language processing (NLP) technologies allow for the stringent analysis of large amounts of Twitter posts. However, compared to texts in other domains, Twitter text has very distinctive characteristics such as very short text, unique Twitter language and structures, etc. For some health-related topics, there also exists the unbalanced class distribution issue (certain classes are much more frequent than other classes), which can further erode the performance of NLP models [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. To improve performance on health-related Twitter datasets, substantial time and effort on feature engineering [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] is needed for conventional machine-learning algorithms, including support vector machines (SVMs), k-nearest neighbors (KNNs), etc.</p>
      <p>Compared to conventional machine learning algorithms, neural network models are advantageous because they have saved significant time on task-specific features engineering, achieved higher performance, and are scalable to large applications [<xref ref-type="bibr" rid="ref19">19</xref>]. Some recent works applied neural network models to social media to understand public perceptions and behaviors. For instance, Lima et al [<xref ref-type="bibr" rid="ref20">20</xref>] investigated the use of a multilayer perceptron neural network to classify personality from Twitter. Huynh et al [<xref ref-type="bibr" rid="ref21">21</xref>] and Coco et al [<xref ref-type="bibr" rid="ref22">22</xref>] proposed a deep neural network model to identify adverse drug reactions from Twitter data. Kendra [<xref ref-type="bibr" rid="ref23">23</xref>] used a 5-layer neural network to characterize the discussion about antibiotics on Twitter. Bian et al [<xref ref-type="bibr" rid="ref24">24</xref>] applied a convolutional neural network model to perform sentiment analysis on layperson’s tweets. Zhao et al [<xref ref-type="bibr" rid="ref25">25</xref>] proposed a semisupervised deep learning for influenza epidemic simulation. However, to our best knowledge, little work has been done to study public perceptions of infectious diseases and vaccinations on Twitter using neural network models.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection</title>
        <p>All tweets including the word “measles” posted between December 1, 2014, and April 30, 2015, were purchased and downloaded from DiscoverText.com. This time frame was chosen because the unidentified Patient Zero of this outbreak visited the Disneyland theme park in California in December 2014. The first few suspected cases of measles were reported on January 5, 2015, and the last case was reported on March 2, 2015. CDC officially declared the outbreak to be over on April 17, 2015 [<xref ref-type="bibr" rid="ref26">26</xref>]. A total of 1,154,156 tweets were collected. The number of tweets collected during the time frame can be seen in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Frequency of measles-related tweets by date and type.</p>
          </caption>
          <graphic xlink:href="jmir_v20i7e236_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Gold Standard Annotation</title>
        <p>In order to understand measles-related contents on Twitter comprehensively, we created an annotation scheme containing 3 dimensions: <italic>discussion themes</italic>, <italic>emotions expressed</italic>, and <italic>attitude toward vaccination</italic>. The coding schemes <italic>discussion themes</italic> and <italic>emotions expressed</italic> were adapted based on Chew and Eysenbach [<xref ref-type="bibr" rid="ref6">6</xref>], while the coding scheme <italic>attitude toward vaccination</italic> was created by the authors inductively. For <italic>discussion themes</italic>, 5 themes were identified: resources (news update about the outbreak, medical information about prevention, treatment, symptoms of measles), personal experience (direct or indirect experiences about measles), personal opinions and interests, questions, and other (unrelated to measles). <italic>Emotions expressed</italic> was categorized into 5 types: humor or sarcasm, positive emotion (relief and downplayed risk), anger, concern, and not applicable. The data collection was based on the keyword measles; however, debate about vaccines emerged in a large percentage of tweets collected. Hence, we took this opportunity to measure how public opinion changed over time during a measles outbreak. <italic>Attitude toward vaccination</italic> was categorized into 3 groups: pro (provaccination), against (antivaccination), and not applicable (no attitude). See <xref ref-type="fig" rid="figure2">Figure 2</xref> for a visual representation of the 3 dimensions and categories within each dimension.</p>
        <p>Two coders manually coded 0.1% of all tweets selected through systematic sampling. The first tweet was identified using a random number generator. After this, every 1000th tweet was selected in the sample. The Cohen kappa intercoder reliability values for the 3 dimensions were 0.78, 0.72, 0.80, respectively. Afterward, the 2 coders discussed their results to resolve discrepancies.</p>
      </sec>
      <sec>
        <title>Neural Network Classification System</title>
        <sec>
          <title>Data Cleaning</title>
          <p>The vocabulary used on Twitter is very different from the general English vocabulary. User names, URLs, and hashtags need to be normalized. We first replaced tokens containing all capital letters with the lowercase of the token with string “&#60;ALLCAPS&#62;”. Then all URLs were replaced with string “&#60;URL&#62;”. Twitter user names (eg, @twitter) were then replaced with string “&#60;USER&#62;”. All numbers were replaced with string “&#60;NUMBER&#62;”. All hashtags were separated into tokens by uppercase letters (eg, we replace “#VaccineWork” with “&#60;HASHTAG&#62; Vaccine Work”). Afterwards, all tweets were converted to lowercase. Our tweets preprocessing process was based on the Stanford GloVe tweets preprocessing script [<xref ref-type="bibr" rid="ref27">27</xref>]. An example illustrating the tweet preprocessing step is shown below:</p>
          <p>Raw tweet text: “RT @KTLA: #BREAKING: At least 9 measles cases linked to visits to @Disneyland from Dec. 15-20 http://t.co/1GRlwFhPgv http://t.co/3Nl15jmqAE”</p>
          <p>Cleaned tweet text: “rt &#60;allcaps&#62; &#60;user&#62;: breaking: at least &#60;number&#62; measles cases linked to visits to &#60;user&#62; from dec. &#60;number&#62; &#60;number&#62; &#60;url&#62; &#60;url&#62;”</p>
        </sec>
        <sec>
          <title>Convolutional Neural Networks</title>
          <p>Commonly used in various computer vision tasks [<xref ref-type="bibr" rid="ref28">28</xref>], convolutional neural networks (CNNs) have demonstrated excellent performance in the NLP field, including different text classification tasks [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref32">32</xref>]. We extended the classic CNN framework for sentence classification proposed by Kim [<xref ref-type="bibr" rid="ref29">29</xref>] by using combination generic Twitter embedding and target domain Twitter embedding [<xref ref-type="bibr" rid="ref33">33</xref>]. Details of our CNN system architecture can be seen in <xref ref-type="fig" rid="figure3">Figure 3</xref>. We cleaned the tweets following the data cleaning step. Then each token of the tweets was mapped to 2 high-dimension representations through 2 word embeddings: generic tweets embedding and target domain tweets embedding. Both embeddings were fine-tuned during the training process.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Measles tweets annotation scheme for different dimensions.</p>
            </caption>
            <graphic xlink:href="jmir_v20i7e236_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>System architecture for measles-related tweets classification using convolutional neural networks.</p>
            </caption>
            <graphic xlink:href="jmir_v20i7e236_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>We used 3 filters of size 3, 4, and 5 to generate the convolutional layer on each embedding. The feature maps generated by filters from each embedding were concatenated and fed to the pooling layer. We adopted max-pooling strategy with a dropout rate at 0.5 on the pooling layer. The output layer consisted of different classes for each dimension. This CNN system was built based on the Python and Tensorflow libraries [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
        </sec>
        <sec>
          <title>Tweets Word Vector Embedding</title>
          <p>For generic tweets embedding, we used pretrained GloVe tweets embedding from Stanford. GloVe is an unsupervised learning algorithm developed by Pennington et al [<xref ref-type="bibr" rid="ref35">35</xref>] to obtain vector representations for words. GloVe tweets word vectors were trained on 2 billion tweets and 27 billion tokens [<xref ref-type="bibr" rid="ref35">35</xref>] and have been widely used in different Twitter-related NLP tasks [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. For target domain embedding, we trained a tweets embedding from our own measles-related tweets corpus (1,154,156 tweets) using the same GloVe algorithm. We tested different numbers of embedding dimensions in our preexperiments. The tweets word embedding in dimension 200 achieved the best performance for our tasks.</p>
        </sec>
      </sec>
      <sec>
        <title>Experiments</title>
        <p>For the CNN-based framework, we performed the following experiments: (1) use of pretrained GloVe tweets embedding only, (2) use of tweets measles embedding only, and (3) use of a combination of the pretrained GloVe tweets embedding and measles tweets embedding. For the use of 1 embedding only, we just used 1 channel of the proposed framework. We chose 4 popular machine learning models for comparison as our baselines: KNN [<xref ref-type="bibr" rid="ref38">38</xref>], naïve Bayes [<xref ref-type="bibr" rid="ref39">39</xref>], SVM [<xref ref-type="bibr" rid="ref40">40</xref>], and random forest [<xref ref-type="bibr" rid="ref41">41</xref>]. For SVM, a radial basis function kernel was used. We followed the same tweet cleaning steps and extracted n-grams as the feature for these traditional machine learning models. The Waikato Environment for Knowledge Analysis library was used to train and test these models [<xref ref-type="bibr" rid="ref42">42</xref>]. We also evaluated the bidirectional long short-term memory (Bi-LSTM), which has achieved state-of-the-art performance in many classification and sequence labeling tasks [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>], for tweets classifications. The input of the Bi-LSTM is the pretrained GloVe tweets embedding (dimension: 200). We conducted these experiments on all 3 dimensions for public perceptions on measles.</p>
      </sec>
      <sec>
        <title>System Evaluation</title>
        <p>We leveraged a 10-fold cross-validation to evaluate the performances of these models for each classification task. Standard metrics including precision, recall, and F1 score were calculated for each class. We also calculated the microaveraging F score and macroaveraging F score to evaluate their performance on each classification task. For microaveraged score, we summed up all the individual true positives, false positives, and false negatives. For macroaveraged score, we took the average of the F1 score of different categories.</p>
      </sec>
      <sec>
        <title>Ethical Approval</title>
        <p>This study received institutional review board approval from the Committee for the Protection of Human Subjects at the University of Texas Health Science Center at Houston. The reference number is HSC-SBMI-16-0291.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Gold Standard Description</title>
        <p>In total, 1151 tweets were annotated. Class distributions were highly unbalanced for all 3 tasks (<xref ref-type="table" rid="table1">Table 1</xref>). In terms of <italic>discussion themes</italic>, nearly two-thirds (718/1151, 62.38%) of tweets were categorized as resources (ie, outbreak update or medical information about measles). Less than one-third (344/1151, 29.89%) of the tweets were about users’ personal opinions and interests. Only 1.82% (21/1151) of the tweets discussed personal experience with measles, and 1.73% (20/1151) asked questions. For <italic>emotions expressed</italic>, 79.84% (919/1151) of tweets were categorized as expressing concern. Humor or sarcasm was found in 9.47% (109/1151) of the tweets. Positive emotion and anger were found in 3.38% (39/1151) and 3.04% (35/1151) of the tweets, respectively. Finally, in terms of <italic>attitude toward vaccination</italic>, the majority of the tweets (913/1151, 79.32%) did not express any opinion about vaccination, 17.55% (202/1151) of tweets were provaccination and 3.13% (36/1151) were antivaccination.</p>
      </sec>
      <sec>
        <title>Overall Comparison of Convolutional Neural Network Models With Conventional Models</title>
        <p>Comparison of the performances of CNN models and 4 machine learning models on the 3 dimensions can be seen in <xref ref-type="table" rid="table2">Table 2</xref>. As shown, CNN-based models have better performance than other conventional machine learning models or the Bi-LSTM model. The CNN model with the combination of 2 embeddings achieved the best performance on <italic>emotions expressed</italic> and the highest macroaveraging F score on <italic>discussion themes</italic>. The CNN model with Stanford embedding had the highest microaveraging F score on <italic>discussion themes</italic> and achieved the best performance on <italic>attitude toward vaccination</italic>. The CNN with measles embedding achieved relatively high microaveraging F score on <italic>emotions expressed</italic> and <italic>attitude toward vaccination</italic>. The Bi-LSTM model had the worst performance among neural network models, probably due to the limited size of training data.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Class distribution in the gold standard for 3 dimensions.</p>
          </caption>
          <table width="1000" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="690"/>
            <col width="0"/>
            <col width="280"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Dimension and class</td>
                <td>Tweets, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3"><bold>Discussion themes</bold></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Resource</td>
                <td colspan="2">718 (62.4)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Personal experience</td>
                <td colspan="2">21 (1.8)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Personal opinions and interest</td>
                <td colspan="2">344 (29.9)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Question</td>
                <td colspan="2">20 (1.7)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Other</td>
                <td colspan="2">48 (4.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="3"><bold>Emotions expressed</bold></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Humor or sarcasm</td>
                <td colspan="2">109 (9.5)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Positive emotion</td>
                <td colspan="2">39 (3.4)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Anger</td>
                <td colspan="2">35 (3.0)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Concern</td>
                <td colspan="2">919 (79.8)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Not applicable</td>
                <td colspan="2">49 (4.3)</td>
              </tr>
              <tr valign="top">
                <td colspan="3"><bold>Attitude toward vaccination</bold></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Pro</td>
                <td colspan="2">202 (17.6)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Against</td>
                <td colspan="2">36 (3.1)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Not applicable</td>
                <td colspan="2">913 (79.3)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Ten-fold cross-validation results of neural network models and 4 conventional machine learning models on 3 dimensions. Italics indicate best performance in that class.</p>
          </caption>
          <table width="1000" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="130"/>
            <col width="150"/>
            <col width="150"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td colspan="3">Microaveraging F score</td>
                <td colspan="3">Macroaveraging F score</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Discussion themes</td>
                <td>Emotions expressed</td>
                <td>Attitude toward vaccination</td>
                <td>Discussion themes</td>
                <td>Emotions expressed</td>
                <td>Attitude toward vaccination</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="bottom">
                <td>KNN<sup>a</sup></td>
                <td>0.5143</td>
                <td>0.6977</td>
                <td>0.8129</td>
                <td>0.3223</td>
                <td>0.4074</td>
                <td>0.5114</td>
              </tr>
              <tr valign="bottom">
                <td>Naïve Bayes</td>
                <td>0.6811</td>
                <td>0.7767</td>
                <td>0.7171</td>
                <td>0.4101</td>
                <td>0.4814</td>
                <td>0.5343</td>
              </tr>
              <tr valign="bottom">
                <td>Random forest</td>
                <td>0.7350</td>
                <td>0.8393</td>
                <td>0.8085</td>
                <td>0.4243</td>
                <td>0.4393</td>
                <td>0.5356</td>
              </tr>
              <tr valign="bottom">
                <td>SVM<sup>b</sup></td>
                <td>0.7696</td>
                <td>0.8365</td>
                <td>0.8211</td>
                <td>0.3917</td>
                <td>0.4269</td>
                <td>0.5345</td>
              </tr>
              <tr valign="bottom">
                <td>Bi-LSTM<sup>c</sup></td>
                <td>0.7315</td>
                <td>0.8271</td>
                <td>0.7958</td>
                <td>0.2899</td>
                <td>0.3730</td>
                <td>0.4358</td>
              </tr>
              <tr valign="bottom">
                <td>CNN_M<sup>d</sup></td>
                <td>0.7533</td>
                <td>0.8480</td>
                <td>0.8355</td>
                <td>0.4282</td>
                <td>0.4849</td>
                <td>0.5871</td>
              </tr>
              <tr valign="bottom">
                <td>CNN_S<sup>e</sup></td>
                <td><italic>0.7897</italic></td>
                <td>0.8575</td>
                <td><italic>0.8642</italic></td>
                <td>0.4158</td>
                <td>0.5419</td>
                <td><italic>0.6629</italic></td>
              </tr>
              <tr valign="bottom">
                <td>CNN_M+S<sup>f</sup></td>
                <td>0.7811</td>
                <td><italic>0.8592</italic></td>
                <td>0.8254</td>
                <td><italic>0.4611</italic></td>
                <td><italic>0.5591</italic></td>
                <td>0.6078</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>KNN: k-nearest neighbor.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>SVM: support vector machines.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>Bi-LSTM: bidirectional long short-term memory.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>CNN_M: convolutional neural network using the measles tweets embedding.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>CNN_S: convolutional neural network using the pretrained GloVe tweets embedding from Stanford.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>CNN_M+S: convolutional neural network using the combination of pretrained GloVe tweets embedding and measles tweets embedding.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>As shown in <xref ref-type="table" rid="table2">Table 2</xref>, among the conventional machine learning models, SVM generally performed the best on all 3 dimensions. In order to further compare the performances of CNN models on each class and try to improve the overall performance, we then calculated and compared the precision, recall, and F score of SVM, the CNN model with Stanford GloVe tweets embedding only, and the CNN model with the combination of generic and target domain embedding.</p>
      </sec>
      <sec>
        <title>Detailed Comparison of Convolutional Neural Network Models With Support Vector Machines on 3 Dimensions</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the comparison of SVM and CNN models on <italic>discussion themes</italic>. For precision score, the CNN with GloVe tweets embedding achieved better performance on classes with larger numbers of tweets (resources and personal opinions and interest). The CNN with the combination of 2 embeddings achieved better performance on classes with very limited numbers of tweets (ie, questions). For recall score, the CNN model with either Stanford embedding or the combination of 2 embeddings greatly improved the recall of the classes with relatively fewer tweets such as personal opinions and interests and questions, while SVM had slightly better performance on resources. The improvement of recall score greatly contributed to the improvement on the F score. Unfortunately, for the class personal experience, none of the models could identify any tweets correctly.</p>
        <p>The comparison of SVM and the CNN models on <italic>emotions expressed</italic> can be seen in <xref ref-type="table" rid="table4">Table 4</xref>. CNN models achieved higher precision scores on classes with fewer cases, including anger and not applicable, while SVM performed better on humor or sarcasm. For recall and F1 score, CNN models with either Stanford embedding or the combination of 2 embeddings performed well on all classes. In general, the CNN with the combination of 2 embeddings had better performance for more categories than the CNN with Stanford embedding only.</p>
        <p>For dimension 3, <italic>attitude toward vaccination</italic>, the overall comparison between the CNN models and SVM can be seen in <xref ref-type="table" rid="table5">Table 5</xref>. Both CNN models outperformed SVM in most of the categories, and the CNN model with Stanford embedding achieved better performance in most of the categories. Specifically, for precision score, SVM performed better on class pro, while the CNN models did better on class against and not applicable. The CNN with the combination of 2 embeddings achieved the highest precision score on against. In terms of recall, the CNN models performed much better on the classes with very small numbers of tweets (ie, pro and against), while SVM did better on the class not applicable. As for F1 score, the CNN with Stanford embedding performed the best, and SVM performed the worst on all 3 classes.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Detailed precision, recall, and F score of each class for <italic>discussion themes</italic>. Italics indicate best performance in that class.</p>
          </caption>
          <table width="1000" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="0"/>
            <col width="80"/>
            <col width="100"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>Class</td>
                <td colspan="3">Precision</td>
                <td colspan="3">Recall</td>
                <td colspan="4">F1 score</td>
              </tr>
              <tr valign="bottom">
                <td><break/></td>
                <td>SVM<sup>a</sup></td>
                <td>CNN_M+S<sup>b</sup></td>
                <td>CNN_S<sup>c</sup></td>
                <td>SVM</td>
                <td>CNN_M+S</td>
                <td colspan="2">CNN_S</td>
                <td>SVM</td>
                <td>CNN_M+S</td>
                <td>CNN_S</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="bottom">
                <td>Resource (n=718)</td>
                <td>0.7907</td>
                <td>0.8119</td>
                <td><italic>0.8172</italic></td>
                <td><italic>0.9471</italic></td>
                <td>0.9318</td>
                <td colspan="2">0.9401</td>
                <td>0.8619</td>
                <td>0.8677</td>
                <td><italic>0.8744</italic></td>
              </tr>
              <tr valign="bottom">
                <td>Personal experience (n=21)</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td colspan="2">0</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
              </tr>
              <tr valign="bottom">
                <td>Personal opinions and interest (n=344)</td>
                <td>0.7021</td>
                <td>0.6984</td>
                <td><italic>0.7231</italic></td>
                <td>0.5773</td>
                <td>0.6192</td>
                <td colspan="2"><italic>0.6453</italic></td>
                <td>0.6336</td>
                <td>0.6564</td>
                <td><italic>0.6820</italic></td>
              </tr>
              <tr valign="bottom">
                <td>Question (n=20)</td>
                <td>0</td>
                <td>0.5</td>
                <td>0</td>
                <td>0</td>
                <td>0.0500</td>
                <td colspan="2">0</td>
                <td>0</td>
                <td><italic>0.0909</italic></td>
                <td>0</td>
              </tr>
              <tr valign="bottom">
                <td>Other (n=48)</td>
                <td>0.8750</td>
                <td>0.8421</td>
                <td><italic>0.8571</italic></td>
                <td>0.1458</td>
                <td><italic>0.3333</italic></td>
                <td colspan="2">0.2500</td>
                <td>0.2500</td>
                <td><italic>0.4776</italic></td>
                <td>0.3871</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>SVM: support vector machines.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>CNN_M+S: convolutional neural network using the combination of pretrained GloVe tweets embedding and measles tweets embedding.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>CNN_S: convolutional neural network using the pretrained GloVe tweets embedding from Stanford.</p>
            </fn>
    
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Detailed precision, recall and F scores of each class for <italic>emotions expressed</italic>. Italics indicate best performance in that class.</p>
          </caption>
          <table width="1000" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="0"/>
            <col width="80"/>
            <col width="0"/>
            <col width="100"/>
            <col width="100"/>
            <col width="0"/>
            <col width="80"/>
            <col width="100"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>Class</td>
                <td colspan="3">Precision</td>
                <td colspan="5">Recall</td>
                <td colspan="4">F1 score</td>
              </tr>
              <tr valign="bottom">
                <td><break/></td>
                <td>SVM<sup>a</sup></td>
                <td>CNN_M+S<sup>b</sup></td>
                <td colspan="2">CNN_S<sup>c</sup></td>
                <td colspan="2">SVM</td>
                <td>CNN_M+S</td>
                <td colspan="2">CNN_S</td>
                <td>SVM</td>
                <td>CNN_ M+S</td>
                <td>CNN_S</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="bottom">
                <td>Humor or sarcasm (n=109)</td>
                <td><italic>1</italic></td>
                <td>0.9388</td>
                <td colspan="2">0.8909</td>
                <td>0.3486</td>
                <td colspan="2">0.4220</td>
                <td colspan="2"><italic>0.4495</italic></td>
                <td>0.5170</td>
                <td>0.5823</td>
                <td><italic>0.5976</italic></td>
              </tr>
              <tr valign="bottom">
                <td>Positive emotion (n=39)</td>
                <td><italic>1</italic></td>
                <td><italic>1</italic></td>
                <td colspan="2"><italic>1</italic></td>
                <td>0.0513</td>
                <td colspan="2"><italic>0.1538</italic></td>
                <td colspan="2">0.1282</td>
                <td>0.0967</td>
                <td><italic>0.2667</italic></td>
                <td>0.2273</td>
              </tr>
              <tr valign="bottom">
                <td>Anger (n=35)</td>
                <td>0</td>
                <td><italic>1</italic></td>
                <td colspan="2">0.6667</td>
                <td>0</td>
                <td colspan="2">0.0286</td>
                <td colspan="2"><italic>0.0571</italic></td>
                <td>0</td>
                <td>0.0556</td>
                <td><italic>0.1053</italic></td>
              </tr>
              <tr valign="bottom">
                <td>Concern (n=919)</td>
                <td>0.8312</td>
                <td>0.8538</td>
                <td colspan="2"><italic>0.8550</italic></td>
                <td>0.9069</td>
                <td colspan="2"><italic>0.9978</italic></td>
                <td colspan="2">0.9946</td>
                <td>0.9069</td>
                <td><italic>0.9202</italic></td>
                <td>0.9195</td>
              </tr>
              <tr valign="bottom">
                <td>Not applicable (n=49)</td>
                <td>0.7500</td>
                <td><italic>0.9048</italic></td>
                <td colspan="2">0.8947</td>
                <td>0.2105</td>
                <td colspan="2"><italic>0.3878</italic></td>
                <td colspan="2">0.3469</td>
                <td>0.2105</td>
                <td><italic>0.5429</italic></td>
                <td>0.5000</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>SVM: support vector machines.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>CNN_M+S: convolutional neural network using the combination of pretrained GloVe tweets embedding and measles tweets embedding.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>CNN_S: convolutional neural network using the pretrained GloVe tweets embedding from Stanford.</p>
            </fn>
   
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Detailed precision, recall, and F score of each class for <italic>attitude toward vaccination</italic>. Italics indicate best performance in that class.</p>
          </caption>
          <table width="1000" cellpadding="8" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="0"/>
            <col width="80"/>
            <col width="100"/>
            <col width="100"/>
            <col width="0"/>
            <col width="80"/>
            <col width="100"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>Class</td>
                <td colspan="3">Precision</td>
                <td colspan="4">Recall</td>
                <td colspan="4">F1 score</td>
              </tr>
              <tr valign="bottom">
                <td><break/></td>
                <td>SVM<sup>a</sup></td>
                <td>CNN_M+S<sup>b</sup></td>
                <td colspan="2">CNN_S<sup>c</sup></td>
                <td>SVM</td>
                <td>CNN_M+S</td>
                <td colspan="2">CNN_S</td>
                <td>SVM</td>
                <td>CNN_M+S</td>
                <td>CNN_S</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="bottom">
                <td>Pro (n=202)</td>
                <td><italic>0.7917</italic></td>
                <td>0.6458</td>
                <td colspan="2">0.7554</td>
                <td>0.1919</td>
                <td>0.3069</td>
                <td colspan="2"><italic>0.5198</italic></td>
                <td>0.3089</td>
                <td>0.4161</td>
                <td><italic>0.6158</italic></td>
              </tr>
              <tr valign="bottom">
                <td>Against (n=36)</td>
                <td>0.6667</td>
                <td><italic>1</italic></td>
                <td colspan="2">0.8571</td>
                <td>0.0556</td>
                <td><italic>0.1667</italic></td>
                <td colspan="2"><italic>0.1667</italic></td>
                <td>0.1026</td>
                <td><italic>0.2857</italic></td>
                <td>0.2791</td>
              </tr>
              <tr valign="bottom">
                <td>Not applicable (n=913)</td>
                <td>0.8228</td>
                <td>0.8408</td>
                <td colspan="2"><italic>0.8794</italic></td>
                <td><italic>0.9890</italic></td>
                <td>0.9660</td>
                <td colspan="2">0.9682</td>
                <td>0.8982</td>
                <td>0.8991</td>
                <td><italic>0.9216</italic></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>SVM: support vector machines.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>CNN_M+S: convolutional neural network using the combination of pretrained GloVe tweets embedding and measles tweets embedding.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>CNN_S: convolutional neural network using the pretrained GloVe tweets embedding from Stanford.</p>
            </fn>
       
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Contributions</title>
        <p>This study makes 2 primary contributions. First, we designed and implemented a comprehensive scheme for the public perception analysis of measles-related tweets, including <italic>discussion themes</italic>, <italic>emotions expressed</italic>, and <italic>attitude toward vaccination</italic>. We manually curated a gold standard set that contains 1151 tweets annotated according the scheme. The tweets were sampled from all measles-related tweets during the most recent measles outbreak in the United States in 2015. Based on the annotation results, we believe the scheme can successfully classify the public’s opinions and emotions. Second, we designed and implemented CNN models on the classification tasks of measles-related tweets and investigated their performance compared to traditional machine learning models through a comprehensive comparison on the small-scale tweets corpus with highly unbalanced class distribution.</p></sec>
        <sec>
        <title>Principal Findings</title>
        <p>In classifying measles-related tweets in terms of <italic>discussion themes</italic>, <italic>emotions expressed</italic>, and <italic>attitude toward vaccination</italic>, different classifiers were better suited for different tasks. However, the CNN models achieved better overall performance on all 3 tasks compared to conventional machine learning algorithms. A detailed comparison of the CNN models and SVM showed that the CNN models were able to improve performance on nearly all classes for all 3 dimensions. The major contributor to the overall performance boost is the improvement on recall, especially for the classes with fewer cases than average. The CNN model with the combinations of 2 embeddings led to better performance on <italic>discussion themes</italic> and <italic>emotions expressed</italic>, while the CNN model with Stanford embedding achieved best performance on <italic>attitude toward vaccination</italic>. A common obstacle of deep neural network-based models is the need for a large training dataset. However, for a disease-related tweets classification task like ours, the results show that CNN models can perform better than conventional machine learning models even on a training dataset with only 1151 labeled tweets.</p></sec>
        
        <sec>
        <title>Limitations and Future Directions</title>
        <p>Although the CNN models can greatly increase the performance for most of the classes with few cases, for some minor classes with extremely low numbers of cases such as personal experience in <italic>discussion themes</italic>, the CNN models are just as powerless as conventional models. Further examination of the prediction results shows that many tweets in the minor classes were incorrectly classified into major classes. For example, the tweets in personal experience were either classified as resources or personal opinions and interest. For against in <italic>attitude toward vaccination</italic>, the majority of the tweets were classified as not applicable, which takes up to 79% of the labeled data. The highly unbalanced class distribution is a major challenge for both conventional machine learning methods and neural network methods. Since the current gold standard training set is relatively small, we plan to collect and annotate more related tweets (especially the tweets belonging to smaller classes) to build a larger labeled dataset. We believe performance could be improved by using a larger labeled training dataset.</p>
        <p>Future research could take a few directions. Additional hyperparameter tuning (ie, activation functions selection, pooling strategies) can also improve the performance on the disease-related tweets classification tasks. In addition, although the Bi-LSTM model doesn’t work well on our tasks (probably due to the limited training data size), other recurrent neural network-based frameworks such as attentive Bi-LSTM [<xref ref-type="bibr" rid="ref45">45</xref>] may lead to better performance, especially as the size of the training data increases. The improved models can be used to automatically predict the labels of the measles tweets, which will facilitate the analysis of large scale public perceptions about measles as well as other infectious diseases. Some unsupervised machine learning methods can also be used to explore the major discussion topics from the measles-related tweets dataset, such as topic modeling methods [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], as it can save the effort of annotation.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Timely understanding of public perceptions during the outbreak of an infectious disease such as measles will allow public health agencies to adapt their messages to address the needs, concerns, and emotions of the public. In order to understand the contents of Twitter text regarding measles and vaccination, we designed a classification scheme that contains <italic>discussion themes</italic>, <italic>emotions expressed</italic>, and <italic>attitude toward vaccination</italic> for measles-related tweets. A gold standard containing 1151 tweets was collected and manually annotated according to the classification scheme. CNN models have been evaluated to classify tweets into different classes for different tasks. A comparative study was done to evaluate the performance of CNN models in comparison to 4 conventional machine learning models as well as a Bi-LSTM model. The CNN models had improved performance on classification of themes, emotions, and attitude from the highly unbalanced measles-related tweets dataset. The CNN models presented in the paper can be applied on large-scale tweets datasets. Our proposed scheme and CNN-based tweets classification system for the public perception analysis on Twitter toward measles disease can be used for other infectious diseases such as influenza and Ebola.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">Bi-LSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNN</term>
          <def>
            <p>convolutional neural networks</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">KNN</term>
          <def>
            <p>k-nearest neighbors</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">SVM</term>
          <def>
            <p>support vector machines</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was partially supported by the National Library of Medicine of the National Institutes of Health under award number R01LM011829, the National Institute of Allergy and Infectious Diseases of the National Institutes of Health under award number R01AI130460, and the UTHealth Innovation for Cancer Prevention Research Training Program Pre-Doctoral Fellowship (Cancer Prevention and Research Institute of Texas grant #RP160015). This study was also partially supported by a University of Alabama System’s Collaborative Grant.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Griffin</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Oldstone</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <source>Measles: History and Basic Biology</source>  
        <year>2008</year>  
        <publisher-loc>Berlin</publisher-loc>
        <publisher-name>Springer Science &#38; Business Media</publisher-name></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Katz</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hinman</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Summary and conclusions: measles elimination meeting, 16-17 March 2000</article-title>
        <source>J Infect Dis</source>  
        <year>2004</year>  
        <month>05</month>  
        <day>01</day>  
        <volume>189 Suppl 1</volume>  
        <fpage>S43</fpage>  
        <lpage>S47</lpage>  
        <pub-id pub-id-type="doi">10.1086/377696</pub-id>
        <pub-id pub-id-type="medline">15106088</pub-id>
        <pub-id pub-id-type="pii">JID20790</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zipprich</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Winter</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Hacker</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Xia</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Watt</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Harriman</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Measles outbreak—California, December 2014-February 2015</article-title>
        <source>MMWR Morb Mortal Wkly Rep</source>  
        <year>2015</year>  
        <volume>64</volume>  
        <issue>6</issue>  
        <fpage>154</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.cdc.gov/mmwr/preview/mmwrhtml/mm6406a5.htm"/>
        </comment></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dredze</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Broniatowski</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Hilyard</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Understanding vaccine refusal: why we need social media now</article-title>
        <source>Am J Prev Med</source>  
        <year>2016</year>  
        <month>04</month>  
        <volume>50</volume>  
        <issue>4</issue>  
        <fpage>550</fpage>  
        <lpage>552</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26655067"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.amepre.2015.10.002</pub-id>
        <pub-id pub-id-type="medline">26655067</pub-id>
        <pub-id pub-id-type="pii">S0749-3797(15)00640-6</pub-id>
        <pub-id pub-id-type="pmcid">PMC4801675</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Reynolds</surname>
            <given-names>BW</given-names>
          </name>
        </person-group>
        <article-title>Crisis and emergency risk communication as an integrative model</article-title>
        <source>J Health Commun</source>  
        <year>2005</year>  
        <volume>10</volume>  
        <issue>1</issue>  
        <fpage>43</fpage>  
        <lpage>55</lpage>  
        <pub-id pub-id-type="doi">10.1080/10810730590904571</pub-id>
        <pub-id pub-id-type="medline">15764443</pub-id>
        <pub-id pub-id-type="pii">QJC5WKY16JXPDUEB</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chew</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Eysenbach</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Pandemics in the age of Twitter: content analysis of Tweets during the 2009 H1N1 outbreak</article-title>
        <source>PLoS One</source>  
        <year>2010</year>  
        <month>11</month>  
        <day>29</day>  
        <volume>5</volume>  
        <issue>11</issue>  
        <fpage>e14118</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0014118"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0014118</pub-id>
        <pub-id pub-id-type="medline">21124761</pub-id>
        <pub-id pub-id-type="pmcid">PMC2993925</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Moorhead</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Hazlett</surname>
            <given-names>DE</given-names>
          </name>
          <name name-style="western">
            <surname>Harrison</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Carroll</surname>
            <given-names>JK</given-names>
          </name>
          <name name-style="western">
            <surname>Irwin</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hoving</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>A new dimension of health care: systematic review of the uses, benefits, and limitations of social media for health communication</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <volume>15</volume>  
        <issue>4</issue>  
        <fpage>e85</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2013/4/e85/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.1933</pub-id>
        <pub-id pub-id-type="medline">23615206</pub-id>
        <pub-id pub-id-type="pii">v15i4e85</pub-id>
        <pub-id pub-id-type="pmcid">PMC3636326</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yates</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Goharian</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>ADRTrace: detecting expected and unexpected adverse drug reactions from user reviews on social media sites</article-title>
        <year>2013</year>  
        <conf-name>Eur Conf Inf Retr</conf-name>
        <conf-date>2013</conf-date>
        <conf-loc>Moscow</conf-loc>
        <fpage>816</fpage>  
        <lpage>819</lpage> </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Freifeld</surname>
            <given-names>CC</given-names>
          </name>
          <name name-style="western">
            <surname>Brownstein</surname>
            <given-names>JS</given-names>
          </name>
          <name name-style="western">
            <surname>Menone</surname>
            <given-names>CM</given-names>
          </name>
          <name name-style="western">
            <surname>Bao</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Filice</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Kass-Hout</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Dasgupta</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Digital drug safety surveillance: monitoring pharmaceutical products in twitter</article-title>
        <source>Drug Saf</source>  
        <year>2014</year>  
        <month>05</month>  
        <volume>37</volume>  
        <issue>5</issue>  
        <fpage>343</fpage>  
        <lpage>350</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24777653"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1007/s40264-014-0155-x</pub-id>
        <pub-id pub-id-type="medline">24777653</pub-id>
        <pub-id pub-id-type="pmcid">PMC4013443</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Du</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Song</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Tao</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Optimization on machine learning based approaches for sentiment analysis on HPV vaccines related tweets</article-title>
        <source>J Biomed Semantics</source>  
        <year>2017</year>  
        <month>03</month>  
        <day>03</day>  
        <volume>8</volume>  
        <issue>1</issue>  
        <fpage>9</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0120-6"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s13326-017-0120-6</pub-id>
        <pub-id pub-id-type="medline">28253919</pub-id>
        <pub-id pub-id-type="pii">10.1186/s13326-017-0120-6</pub-id>
        <pub-id pub-id-type="pmcid">PMC5335787</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhou</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Coiera</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Tsafnat</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Arachi</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Ong</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Dunn</surname>
            <given-names>AG</given-names>
          </name>
        </person-group>
        <article-title>Using social connection information to improve opinion mining: identifying negative sentiment about HPV vaccines on Twitter</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2015</year>  
        <volume>216</volume>  
        <fpage>761</fpage>  
        <lpage>765</lpage>  
        <pub-id pub-id-type="medline">26262154</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Salathé</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Khandelwal</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Assessing vaccination sentiments with online social media: implications for infectious disease dynamics and control</article-title>
        <source>PLoS Comput Biol</source>  
        <year>2011</year>  
        <month>10</month>  
        <volume>7</volume>  
        <issue>10</issue>  
        <fpage>e1002199</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pcbi.1002199"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002199</pub-id>
        <pub-id pub-id-type="medline">22022249</pub-id>
        <pub-id pub-id-type="pii">PCOMPBIOL-D-11-00652</pub-id>
        <pub-id pub-id-type="pmcid">PMC3192813</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Du</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Song</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Tao</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Leveraging machine learning-based approaches to assess human papillomavirus vaccination sentiment trends with Twitter data</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2017</year>  
        <month>07</month>  
        <day>05</day>  
        <volume>17</volume>  
        <issue>Suppl 2</issue>  
        <fpage>69</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0469-6"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s12911-017-0469-6</pub-id>
        <pub-id pub-id-type="medline">28699569</pub-id>
        <pub-id pub-id-type="pii">10.1186/s12911-017-0469-6</pub-id>
        <pub-id pub-id-type="pmcid">PMC5506590</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Culotta</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Towards detecting influenza epidemics by analyzing Twitter messages</article-title>
        <year>2010</year>  
        <conf-name>Proc First Work Soc Media Anal</conf-name>
        <conf-date>2010</conf-date>
        <conf-loc>Washington</conf-loc>
        <fpage>115</fpage>  
        <lpage>122</lpage> </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Schmidt</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Trending now: using social media to predict and track disease outbreaks</article-title>
        <source>Environ Health Perspect</source>  
        <year>2012</year>  
        <month>01</month>  
        <volume>120</volume>  
        <issue>1</issue>  
        <fpage>A30</fpage>  
        <lpage>A33</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1289/ehp.120-a30"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1289/ehp.120-a30</pub-id>
        <pub-id pub-id-type="medline">22214548</pub-id>
        <pub-id pub-id-type="pmcid">PMC3261963</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Myneni</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Fujimoto</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Cobb</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Cohen</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Content-driven analysis of an online community for smoking cessation: integration of qualitative techniques, automated text analysis, and affiliation networks</article-title>
        <source>Am J Public Health</source>  
        <year>2015</year>  
        <month>06</month>  
        <volume>105</volume>  
        <issue>6</issue>  
        <fpage>1206</fpage>  
        <lpage>1212</lpage>  
        <pub-id pub-id-type="doi">10.2105/AJPH.2014.302464</pub-id>
        <pub-id pub-id-type="medline">25880942</pub-id>
        <pub-id pub-id-type="pmcid">PMC4431114</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lopez</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kalita</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Deep learning applied to NLP</article-title>
        <source>arXiv Prepr arXiv170303091</source>  
        <year>2017</year>  
        <fpage>1</fpage>  <comment> 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href=" https://arxiv.org/abs/1703.03091"/></comment>
</nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Young</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Hazarika</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Poria</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Cambria</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Recent trends in deep learning based natural language processing</article-title>
        <source>arXiv Prepr arXiv170802709</source>  
        <year>2017</year>  
        <fpage>1</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/abs/1708.02709">https://arxiv.org/abs/1708.02709</ext-link>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Goodfellow</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Bengio</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Courville</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <source>Deep Learning</source>  
        <year>2016</year>  
        <publisher-loc>Cambridge</publisher-loc>
        <publisher-name>MIT Press</publisher-name></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lima</surname>
            <given-names>ACES</given-names>
          </name>
          <name name-style="western">
            <surname>de Castro</surname>
            <given-names>LN</given-names>
          </name>
        </person-group>
        <article-title>A multi-label, semi-supervised classification approach applied to personality prediction in social media</article-title>
        <source>Neural Netw</source>  
        <year>2014</year>  
        <month>10</month>  
        <volume>58</volume>  
        <fpage>122</fpage>  
        <lpage>130</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.neunet.2014.05.020</pub-id>
        <pub-id pub-id-type="medline">24969690</pub-id>
        <pub-id pub-id-type="pii">S0893-6080(14)00128-2</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Huynh</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>He</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Willis</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Uger</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Adverse drug reaction classification with deep neural networks</article-title>
        <source>Proc Coling Tech Papers</source>  
        <year>2016</year>  
        <fpage>877</fpage>  
        <lpage>887</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/C16-1084"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cocos</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Fiks</surname>
            <given-names>AG</given-names>
          </name>
          <name name-style="western">
            <surname>Masino</surname>
            <given-names>AJ</given-names>
          </name>
        </person-group>
        <article-title>Deep learning for pharmacovigilance: recurrent neural network architectures for labeling adverse drug reactions in Twitter posts</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2017</year>  
        <month>07</month>  
        <day>01</day>  
        <volume>24</volume>  
        <issue>4</issue>  
        <fpage>813</fpage>  
        <lpage>821</lpage>  
        <pub-id pub-id-type="doi">10.1093/jamia/ocw180</pub-id>
        <pub-id pub-id-type="medline">28339747</pub-id>
        <pub-id pub-id-type="pii">3041102</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kendra</surname>
            <given-names>RL</given-names>
          </name>
          <name name-style="western">
            <surname>Karki</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Eickholt</surname>
            <given-names>JL</given-names>
          </name>
          <name name-style="western">
            <surname>Gandy</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Characterizing the discussion of antibiotics in the Twittersphere: what is the bigger picture?</article-title>
        <source>J Med Internet Res</source>  
        <year>2015</year>  
        <volume>17</volume>  
        <issue>6</issue>  
        <fpage>e154</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2015/6/e154/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.4220</pub-id>
        <pub-id pub-id-type="medline">26091775</pub-id>
        <pub-id pub-id-type="pii">v17i6e154</pub-id>
        <pub-id pub-id-type="pmcid">PMC4526952</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bian</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Salloum</surname>
            <given-names>RG</given-names>
          </name>
          <name name-style="western">
            <surname>Guo</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Prosperi</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Du</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Ramirez-Diaz</surname>
            <given-names>LJ</given-names>
          </name>
          <name name-style="western">
            <surname>He</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Using social media data to understand the impact of promotional information on laypeople's discussions: a case study of Lynch syndrome</article-title>
        <source>J Med Internet Res</source>  
        <year>2017</year>  
        <month>12</month>  
        <day>13</day>  
        <volume>19</volume>  
        <issue>12</issue>  
        <fpage>e414</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2017/12/e414/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.9266</pub-id>
        <pub-id pub-id-type="medline">29237586</pub-id>
        <pub-id pub-id-type="pii">v19i12e414</pub-id>
        <pub-id pub-id-type="pmcid">PMC5745354</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Tech</surname>
            <given-names>V</given-names>
          </name>
        </person-group>
        <article-title>SimNest: social media nested epidemic simulation via online semi-supervised deep learning</article-title>
        <source>Proc IEEE Int Conf Data Min</source>  
        <year>2015</year>  
        <month>11</month>  
        <fpage>639</fpage>  
        <lpage>648</lpage>  
        <pub-id pub-id-type="doi">10.1109/ICDM.2015.39.SimNest</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
        <source>California Department of Public Health</source>  
        <year>2015</year>  
        <month>04</month>  
        <day>17</day>  
        <access-date>2018-04-02</access-date>
        <comment>California measles surveillance update 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdph.ca.gov/Programs/CID/DCDC/CDPH%20Document%20Library/Immunization/IMM-MeaslesUpdate2015-04-17.pdf">https://www.cdph.ca.gov/Programs/CID/DCDC/CDPH%20Document%20Library/Immunization/IMM-MeaslesUpdate2015-04-17.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6yMpyCvVq"/></comment> </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
        <source>Script for preprocessing tweets</source>  
        <access-date>2018-04-02</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://nlp.stanford.edu/projects/glove/preprocess-twitter.rb">https://nlp.stanford.edu/projects/glove/preprocess-twitter.rb</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6yMqCCzuH"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>LeCun</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Kavukcuoglu</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Farabet</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Convolutional networks and applications in vision</article-title>
        <year>2010</year>  
        <conf-name>Circuits Syst (ISCAS), Proc IEEE Int Symp</conf-name>
        <conf-date>2010</conf-date>
        <conf-loc>Paris</conf-loc>
        <fpage>253</fpage>  
        <lpage>256</lpage> </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Convolutional neural networks for sentence classification</article-title>
        <source>arXiv Prepr arXiv14085882</source>  
        <year>2014</year>  
        <fpage>1</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/abs/1408.5882">https://arxiv.org/abs/1408.5882</ext-link>
        </comment></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dos</surname>
            <given-names>SC</given-names>
          </name>
          <name name-style="western">
            <surname>Gatti</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Deep convolutional neural networks for sentiment analysis of short texts</article-title>
        <source>Proc Coling Tech Papers</source>  
        <year>2014</year>  
        <fpage>69</fpage>  
        <lpage>78</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/C14-1008"/>
        </comment></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nakov</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Ritter</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Rosenthal</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Sebastiani</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Stoyanov</surname>
            <given-names>V</given-names>
          </name>
        </person-group>
        <article-title>SemEval-2016 task 4: Sentiment analysis in Twitter</article-title>
        <year>2016</year>  
        <conf-name>Proc SemEval</conf-name>
        <conf-date>2016</conf-date>
        <conf-loc>San Diego</conf-loc>
        <fpage>1</fpage>  
        <lpage>18</lpage> </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Conneau</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Schwenk</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Le Cun</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Barrault</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Very deep convolutional networks for text classification</article-title>
        <source>arXiv Prepr arXiv160601781</source>  
        <year>2016</year>  
        <fpage>1</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/abs/1606.01781"/>
        </comment></nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Limsopatham</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Collier</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Modelling the combination of generic and target domain embeddings in a convolutional neural network for sentence classification</article-title>
        <source>Assoc Computational Linguistics</source>  
        <year>2016</year>  
        <fpage>136</fpage>  
        <lpage>140</lpage>  
        <pub-id pub-id-type="doi">10.17863/CAM.4667</pub-id></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Abadi</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Agarwal</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Barham</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Brevdo</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Citro</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Corrado</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Davis</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Dean</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Devin</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Tensorflow: large-scale machine learning on heterogeneous distributed systems</article-title>
        <source>arXiv Prepr arXiv160304467</source>  
        <year>2016</year>  
        <fpage>1</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://arxiv.org/abs/1603.04467"/>
        </comment></nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pennington</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Socher</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Manning</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>GloVe: global vectors for word representation</article-title>
        <year>2014</year>  
        <conf-name>Proceedings of the conference on empirical methods in natural language processing</conf-name>
        <conf-date>2014</conf-date>
        <conf-loc>Doha</conf-loc>
        <fpage>1532</fpage>  
        <lpage>1543</lpage> </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Astudillo</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Amir</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ling</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Martins</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Silva</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Trancoso</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Redol</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Inesc-id: a regression model for large scale twitter sentiment lexicon induction</article-title>
        <year>2015</year>  
        <conf-name>Proc 9th Int Work Semant Eval (SemEval )</conf-name>
        <conf-date>2015</conf-date>
        <conf-loc>Lisbon</conf-loc>
        <fpage>613</fpage>  
        <lpage>618</lpage> </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yamada</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Takeda</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Takefuji</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Enhancing named entity recognition in twitter messages using entity linking</article-title>
        <year>2015</year>  
        <conf-name>Proceedings of the ACL 2015 Workshop on Noisy User-generated Text</conf-name>
        <conf-date>2015</conf-date>
        <conf-loc>Beijing</conf-loc>
        <fpage>136</fpage>  
        <lpage>140</lpage> </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Peterson</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>K-nearest neighbor</article-title>
        <source>Scholarpedia</source>  
        <year>2009</year>  
        <volume>4</volume>  
        <issue>2</issue>  
        <fpage>1883</fpage>  
        <pub-id pub-id-type="doi">10.4249/scholarpedia.1883</pub-id></nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Murphy</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <source>Naive Bayes Classifiers [dissertation]</source>  
        <year>2006</year>  
        <publisher-loc>Vancouver</publisher-loc>
        <publisher-name>University of British Columbia</publisher-name></nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hearst</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Dumais</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Osuna</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Platt</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Scholkopf</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Support vector machines</article-title>
        <source>IEEE Intell Syst their Appl IEEE</source>  
        <year>1998</year>  
        <volume>13</volume>  
        <issue>4</issue>  
        <fpage>28</fpage>
        <pub-id pub-id-type="doi">10.1109/5254.708428</pub-id></nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liaw</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Wiener</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Classification and regression by randomForest</article-title>
        <source>R News</source>  
        <year>2002</year>  
        <volume>2</volume>  
        <issue>3</issue>  
        <fpage>18</fpage>  
        <lpage>22</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://cran.r-project.org/doc/Rnews/Rnews_2002-3.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hall</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Frank</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Holmes</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Pfahringer</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Reutemann</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Witten</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <article-title>The WEKA data mining software: an update</article-title>
        <source>ACM SIGKDD Explor Newsl</source>  
        <year>2009</year>  
        <volume>11</volume>  
        <issue>1</issue>  
        <fpage>10</fpage>  
        <lpage>18</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.kdd.org/exploration_files/p2V11n1.pdf"/>
        </comment></nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Dernoncourt</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Uzuner</surname>
            <given-names>O</given-names>
          </name>
          <name name-style="western">
            <surname>Szolovits</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>De-identification of patient notes with recurrent neural networks</article-title>
        <source>J Am Med Informatics Assoc</source>  
        <year>2016</year>  
        <fpage>156</fpage>
        <pub-id pub-id-type="doi">10.1093/jamia/ocw156</pub-id></nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Bidirectional long short-term memory networks for relation classification</article-title>
        <year>2015</year>  
        <conf-name>Proc 29th Pacific Asia Conf Lang Inf Comput</conf-name>
        <conf-date>2015</conf-date>
        <conf-loc>Shanghai</conf-loc>
        <fpage>73</fpage>  
        <lpage>78</lpage> </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhou</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Shi</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Tian</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Qi</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Hao</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Attention-based bidirectional long short-term memory networks for relation classification</article-title>
        <year>2016</year>  
        <conf-name>Proc 54th Annu Meet Assoc Comput Linguist (Volume 2)</conf-name>
        <conf-date>2016</conf-date>
        <conf-loc>Berlin</conf-loc>
        <fpage>207</fpage>  
        <lpage>212</lpage>  
        <pub-id pub-id-type="doi">10.18653/v1/P16-2034</pub-id></nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Johnson</surname>
            <given-names>TR</given-names>
          </name>
          <name name-style="western">
            <surname>Kavuluru</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Phrase based topic modeling for semantic information processing in biomedicine</article-title>
        <source>Proc Int Conf Mach Learn Appl</source>  
        <year>2013</year>  
        <month>12</month>  
        <fpage>440</fpage>  
        <lpage>445</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28736774"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1109/ICMLA.2013.89</pub-id>
        <pub-id pub-id-type="medline">28736774</pub-id>
        <pub-id pub-id-type="pmcid">PMC5521983</pub-id></nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bian</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Yoshigoe</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Hicks</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Yuan</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>He</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Xie</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Guo</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Prosperi</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Salloum</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Modave</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Mining Twitter to assess the public perception of the Internet of Things</article-title>
        <source>PLoS One</source>  
        <year>2016</year>  
        <volume>11</volume>  
        <issue>7</issue>  
        <fpage>e0158450</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0158450"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0158450</pub-id>
        <pub-id pub-id-type="medline">27391760</pub-id>
        <pub-id pub-id-type="pii">PONE-D-15-54733</pub-id>
        <pub-id pub-id-type="pmcid">PMC4938510</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
