<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i5e19421</article-id>
      <article-id pub-id-type="pmid">32452804</article-id>
      <article-id pub-id-type="doi">10.2196/19421</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using Reports of Symptoms and Diagnoses on Social Media to Predict COVID-19 Case Counts in Mainland China: Observational Infoveillance Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Liang</surname>
            <given-names>Hai</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Fu</surname>
            <given-names>King-wa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lau</surname>
            <given-names>Eric</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Basch</surname>
            <given-names>Corey</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>Cuihua</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1645-8211</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Anfan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7406-0415</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Luo</surname>
            <given-names>Chen</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9736-0533</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Jingwen</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1733-6857</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Feng</surname>
            <given-names>Bo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7045-6455</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Liao</surname>
            <given-names>Wang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Communication</institution>
            <institution>University of California, Davis</institution>
            <addr-line>One Shields Ave</addr-line>
            <addr-line>Davis, CA, </addr-line>
            <country>United States</country>
            <phone>1 5307520966</phone>
            <email>wngliao@ucdavis.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6192-0837</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Communication</institution>
        <institution>University of California, Davis</institution>
        <addr-line>Davis, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Science Communication and Science Policy</institution>
        <institution>University of Science and Technology of China</institution>
        <addr-line>Hefei</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Journalism and Communication</institution>
        <institution>Tsinghua University</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Public Health Sciences</institution>
        <institution>University of California, Davis</institution>
        <addr-line>Davis, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Wang Liao <email>wngliao@ucdavis.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>28</day>
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>5</issue>
      <elocation-id>e19421</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>4</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>18</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>25</day>
          <month>5</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Cuihua Shen, Anfan Chen, Chen Luo, Jingwen Zhang, Bo Feng, Wang Liao. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 28.05.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/5/e19421/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Coronavirus disease (COVID-19) has affected more than 200 countries and territories worldwide. This disease poses an extraordinary challenge for public health systems because screening and surveillance capacity is often severely limited, especially during the beginning of the outbreak; this can fuel the outbreak, as many patients can unknowingly infect other people.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to collect and analyze posts related to COVID-19 on Weibo, a popular Twitter-like social media site in China. To our knowledge, this infoveillance study employs the largest, most comprehensive, and most fine-grained social media data to date to predict COVID-19 case counts in mainland China.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We built a Weibo user pool of 250 million people, approximately half the entire monthly active Weibo user population. Using a comprehensive list of 167 keywords, we retrieved and analyzed around 15 million COVID-19–related posts from our user pool from November 1, 2019 to March 31, 2020. We developed a machine learning classifier to identify “sick posts,” in which users report their own or other people’s symptoms and diagnoses related to COVID-19. Using officially reported case counts as the outcome, we then estimated the Granger causality of sick posts and other COVID-19 posts on daily case counts. For a subset of geotagged posts (3.10% of all retrieved posts), we also ran separate predictive models for Hubei province, the epicenter of the initial outbreak, and the rest of mainland China.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found that reports of symptoms and diagnosis of COVID-19 significantly predicted daily case counts up to 14 days ahead of official statistics, whereas other COVID-19 posts did not have similar predictive power. For the subset of geotagged posts, we found that the predictive pattern held true for both Hubei province and the rest of mainland China regardless of the unequal distribution of health care resources and the outbreak timeline.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Public social media data can be usefully harnessed to predict infection cases and inform timely responses. Researchers and disease control agencies should pay close attention to the social media infosphere regarding COVID-19. In addition to monitoring overall search and posting activities, leveraging machine learning approaches and theoretical understanding of information sharing behaviors is a promising approach to identify true disease signals and improve the effectiveness of infoveillance.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>SARS-CoV-2</kwd>
        <kwd>novel coronavirus</kwd>
        <kwd>infectious disease</kwd>
        <kwd>social media</kwd>
        <kwd>Weibo</kwd>
        <kwd>China</kwd>
        <kwd>disease surveillance</kwd>
        <kwd>surveillance</kwd>
        <kwd>infoveillance</kwd>
        <kwd>infodemiology</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Since the outbreak of coronavirus disease (COVID-19) in December 2019 in Wuhan, Hubei Province, China [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>], the novel coronavirus has affected more than 200 countries and territories worldwide. As of May 16, 2020, there were more than 4 million confirmed cases of COVID-19 and over 300,000 deaths [<xref ref-type="bibr" rid="ref3">3</xref>]. Amid many unknown factors, severe lack of laboratory testing capacity, delays in case reports, variations in local COVID-19 responses, and uncoordinated communication pose tremendous challenges for monitoring the dynamics of the epidemic and developing policies and targeted interventions for resource allocation. </p>
      <p>When conventional disease surveillance capacity is limited, publicly available social media and internet data can play a crucial role in uncovering the hidden dynamics of an emerging outbreak [<xref ref-type="bibr" rid="ref4">4</xref>]. Research in digital disease surveillance, also referred to as infoveillance or infodemiology, has shown great promise in the useful employment of internet data to track the real time development of public attention, sentiment, and health [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. Specifically, data based on internet searches and social media activities can nowcast and forecast disease prevalence as a supplement to conventional surveillance methods for various infectious diseases [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      <p>One of the best-known examples of digital disease surveillance is Google Flu Trends, which used real time Google search terms to predict clinical incidence rates of influenza with great initial success [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Data from social media platforms such as Twitter have also been shown to be effective in predicting and tracking various epidemics, such as influenza [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref12">12</xref>] and Zika virus [<xref ref-type="bibr" rid="ref15">15</xref>], with varying degrees of success. However, digital surveillance data present unique challenges. For example, after its release in 2008, Google Flu Trends became less accurate over time, consistently overestimating flu prevalence during 2011-2013. The prediction error was partially attributed to people’s changing search behaviors as well as increased public attention to the epidemic itself, which fueled awareness-related search queries that were not strongly related to disease incidence [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Compared to aggregated search queries, user-generated social media data have the advantage of being more direct and granular, allowing researchers to mine specific content to reflect actual illness. However, media attention to emerging diseases can fuel social media activities, resulting in a deluge of discussions that dilute true disease signals of actual infection cases; thus, predictions are less accurate [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
      <p>The unprecedented magnitude and transmission speed of COVID-19 brought about massive social media activities as people isolated themselves in their homes to break the infection chain [<xref ref-type="bibr" rid="ref17">17</xref>]. Massive social media data inevitably contain massive noise (eg, public reactions and awareness of the disease), which can be counterproductive for disease forecasting. A few early infoveillance studies tracked public discussion of COVID-19 and patient characteristics on Weibo, the most popular public social media site in China [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. Two studies suggested that COVID-19–related Weibo posts and search queries can be used to predict disease prevalence [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. However, these studies relied upon coarse-grained social media data and query data based on a few keywords with a short time window at the onset of the outbreak [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. As such, the predictive accuracy and result interpretability of these studies are limited by the same pitfalls of infoveillance studies mentioned above. There are many reasons to search for and discuss COVID-19 on social media, especially because the disease has received substantial media coverage and many countries are under mandatory lockdown. To more accurately predict infection cases and inform a rapid response, it is therefore critical to use granular and specific social media data to identify reliable disease signals (ie, “sick posts” reporting symptoms and diagnosis).</p>
      <p>Here, we present an infoveillance effort to collect and analyze COVID-19–related posts on Weibo and to identify specific types of Weibo posts that can predict COVID-19 case counts in mainland China. To our knowledge, this study involves the largest, most comprehensive, and most granular collection of social media data related to COVID-19 in the Chinese language, far exceeding the scale, granularity, and timespan of similar studies [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. We built a Weibo user pool of 250 million people, approximately half the active Weibo user population [<xref ref-type="bibr" rid="ref23">23</xref>]. Using a comprehensive list of 167 keywords associated with COVID-19, we retrieved around 15 million social media posts from November 1, 2019 to March 31, 2020. With greatly increased data granularity, we developed a supervised machine learning classifier to distinguish “sick posts,” which are reports of one’s own and other people’s symptoms or diagnosis, from other COVID-19 related posts that could dilute disease signals from the data stream. Using the officially reported case counts as the outcome, we compared the predictive power of sick posts versus other COVID-19 posts. We show evidence that sick posts predicted the daily cases reported by the Chinese Center for Disease Control and Prevention (China CDC) up to 14 days in advance, while other COVID-19–related posts had much weaker predictive power. For the subset of geotagged posts, we found that the predictive pattern held true for both Hubei province and the rest of mainland China. Our work demonstrates a viable method to identify disease signals through reports of symptoms or diagnosis rather than relying upon general discussion of COVID-19, making a significant contribution to the infoveillance literature. </p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection</title>
        <p>The social media data used in this study were collected from a popular Chinese microblog platform, Weibo, which had over 516 million monthly active users at the end of 2019 [<xref ref-type="bibr" rid="ref23">23</xref>]. Weibo is very similar to Twitter, access to which is blocked in mainland China. Unlike Twitter, Weibo does not provide large-scale public application programming interface (API) access to its database. Weibo enables keyword-based advanced searching of posts via its web interface; however, per Weibo policy, the output of these searches is limited to 50 pages (or around 1000 posts). Therefore, large-scale public data access is notoriously difficult.</p>
        <p>To bypass these limitations, we employed a Weibo user pool originally built in 2018, which started from 5 million active Weibo users obtained in our previous research unrelated to COVID-19 [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. We then retrieved the initial 5 million users’ followers and followees (second degree users), the followers and followees of the second degree users (third degree users), etc., until no new users were found. This snowball process resulted in a pool of 250 million users (with bots filtered out), which represents approximately 48.4% of all monthly active Weibo users in 2019 [<xref ref-type="bibr" rid="ref23">23</xref>] and is similar to the 2018 population of Weibo users in terms of self-reported sex and age distribution [<xref ref-type="bibr" rid="ref26">26</xref>] (see <xref rid="figure1" ref-type="fig">Figure 1</xref>). </p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Demographic composition of our Weibo user pool compared to that in the 2018 Annual Sina Weibo user report. Age is reported in years.</p>
          </caption>
          <graphic xlink:href="jmir_v22i5e19421_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>COVID-19 Posts</title>
        <p>Following best practices for content retrieval and analysis [<xref ref-type="bibr" rid="ref27">27</xref>], we generated a comprehensive list of keywords related to COVID-19 through close observation of Weibo posts every day from late January to March 2020. We then retrieved COVID-19 posts by searching all posts by users in the user pool with 167 keywords covering general terms related to the epidemic, such as coronavirus and pneumonia, as well as specific locations (eg, “Wuhan”), drugs (eg, “remdesivir”) and preventive measures (eg, “mask”). For a complete keyword list, see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Table A).</p>
        <p>After removing duplicates (ie, reposts of original posts), we retained 14,983,647 posts sent between November 1, 2019 (ie, 30 days before the first confirmed cases) and March 31, 2020 (to access the Weibo dataset on COVID-19, see [<xref ref-type="bibr" rid="ref28">28</xref>]).</p>
        <p>A subset of 464,111/14,983,647 of these posts (3.10%) were tagged with geographic information. We distinguished between posts sent within Hubei province (ie, the epicenter; 169,340/14,983,647; 36.49%) and those from elsewhere in mainland China (294,771/14,983,647; 63.51%).</p>
      </sec>
      <sec>
        <title>Sick Posts</title>
        <p>We conceptually defined “sick posts” as posts that report any symptoms or diagnoses that are likely related to COVID-19 based on published research and news reports from the medical social media site DXY.cn [<xref ref-type="bibr" rid="ref29">29</xref>]. We collected a broad list of symptoms, including common symptoms such as cough and shortness of breath and uncommon symptoms such as diarrhea. Sick posts can be further categorized into “ingroup sick posts,” which we defined as posts that disclose the user’s own or immediate family members’ symptoms or diagnoses, and “outgroup sick posts,” which report symptoms and diagnoses of people not in the user’s immediate family. The reason for the a priori categorization is that people tend to have firsthand and more accurate information about their own or immediate family members’ medical conditions; meanwhile, they have much less reliable information about people outside of their household, especially during a national lockdown. All posts that were obtained using the 167 keywords but did not fall into these categories were classified as “other COVID-19 posts.” We provide an example of an ingroup sick post below (translated and edited for brevity): </p>
        <disp-quote>
          <p>During the SARS epidemic in 2003, I got pneumonia with symptoms of fever and cough, was suspected of being infected with SARS, and ended up being hospitalized for more than a month. Now we got COVID-19 in 2020 and I started coughing again, which has lasted for more than a month. What a mess &#60;Face Palm&#62; (Posted 10:23 PM, January 29, 2020)</p>
        </disp-quote>
        <p>We also provide an example of an outgroup sick post:</p>
        <disp-quote>
          <p>One man in another village drank too much. He said he felt sick and had cold symptoms. His brother measured his temperature which turned out to be 38 Celsius. His brother called 120 and sent him to hospital. The whole village was shocked and everyone was afraid to go outside. (Posted 10:14 PM, January 29, 2020)</p>
        </disp-quote>
        <p>We used supervised machine learning algorithms to identify sick posts from the keyword-retrieved COVID-19 posts. We first sampled 11,575 posts in proportion to the retrieved posts across 5 months of data collection. Next, 11 human judges annotated whether a post was an ingroup sick post, outgroup sick post, or other COVID-19 post. The judges independently annotated a subset of 138 posts and achieved high agreement (Krippendorff α=.945) before they divided and annotated the remaining posts. Then, the annotated posts were used to train machine learning models with various algorithms. Based on the classification performance (see <xref ref-type="table" rid="table1">Table 1</xref>), we selected the model using the random forest algorithm (F1 score=0.880). The model classified the 14,983,647 COVID-19 posts into 394,658 (2.63%) ingroup sick posts, 97,635 (0.65%) outgroup sick posts, and 14,491,354 (96.71%) other COVID-19 posts. Because of the low number of outgroup sick posts, we combined ingroup and outgroup sick posts in subsequent analyses. </p>
        <p>Among the subset of geotagged COVID-19 posts (464,111/14,983,647, 3.10% of all retrieved posts), 5,650 sick posts (1.2%) and 163,690 other COVID-19 posts (35.3%) were tagged in Hubei; meanwhile, 26,488 sick posts (5.7%) and 268,283 other COVID-19 posts (57.8%) were from elsewhere in mainland China. These post counts were then aggregated by days. To control for the day-to-day fluctuations of Weibo posts, we further normalized these numbers against the daily counts of all Weibo posts generated by our user pool. The normalized sick post and other COVID-19 post counts can be interpreted as counts per 1 million posts. <xref rid="figure2" ref-type="fig">Figure 2</xref> summarizes our data collection and classification process.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Performance of machine learning models in classifying sick posts.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="374"/>
            <col width="157"/>
            <col width="157"/>
            <col width="157"/>
            <col width="155"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>F1 score</td>
                <td>Precision</td>
                <td>Accuracy</td>
                <td>Recall</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Decision tree</td>
                <td>0.835</td>
                <td>0.840</td>
                <td>0.830</td>
                <td>0.830</td>
              </tr>
              <tr valign="top">
                <td>Extra tree</td>
                <td>0.785</td>
                <td>0.785</td>
                <td>0.785</td>
                <td>0.785</td>
              </tr>
              <tr valign="top">
                <td>Extra trees</td>
                <td>0.878</td>
                <td>0.881</td>
                <td>0.885</td>
                <td>0.885</td>
              </tr>
              <tr valign="top">
                <td>K nearest neighbors</td>
                <td>0.810</td>
                <td>0.819</td>
                <td>0.819</td>
                <td>0.819</td>
              </tr>
              <tr valign="top">
                <td>Multilayer perceptron</td>
                <td>0.847</td>
                <td>0.845</td>
                <td>0.851</td>
                <td>0.851</td>
              </tr>
              <tr valign="top">
                <td>Support vector machine</td>
                <td>0.877</td>
                <td>0.877</td>
                <td>0.878</td>
                <td>0.878</td>
              </tr>
              <tr valign="top">
                <td>Random forest</td>
                <td>0.880</td>
                <td>0.885</td>
                <td>0.888</td>
                <td>0.888</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Weibo data collection and classification procedure.</p>
          </caption>
          <graphic xlink:href="jmir_v22i5e19421_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>COVID-19 Daily Case Counts</title>
        <p>We collected the daily new case counts in mainland China from China CDC on May 8, 2020. China CDC’s official website started collating data on January 16, 2020. Earlier counts were obtained from Huang et al [<xref ref-type="bibr" rid="ref1">1</xref>] and validated against relevant briefings from the National Health Commission. The final case data cover the same period from November 1, 2019, to March 31, 2020, within which the first reported COVID-19 clinical case dates back to December 1, 2019. We also distinguished between cases within and outside Hubei (see <xref rid="figure3" ref-type="fig">Figure 3</xref>).</p>
        <p>It is noteworthy that China CDC released seven editions of diagnostic criteria throughout the time period covered in this study and thus introduced systematic changes to the case counts. Particularly, on February 12, 2020, Hubei province started to implement the fifth edition of the COVID-19 diagnostic criteria released on February 4, 2020. This led to a temporary surge of new cases [<xref ref-type="bibr" rid="ref30">30</xref>]. The impact of this incident was controlled for in our analyses, as discussed in the section below. After close comparison of each edition, we concluded that the changes among other editions of the diagnostic criteria were relatively minor, and their release dates did not appear to be associated with abrupt changes in the case counts; therefore, we did not further control for them.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Daily Weibo posts and confirmed COVID-19 cases between November 1, 2019 and March 31, 2020.</p>
          </caption>
          <graphic xlink:href="jmir_v22i5e19421_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>We performed Granger causality tests [<xref ref-type="bibr" rid="ref31">31</xref>] to discover if an increase of sick posts forecasted an increase of new cases, as formulated in the following linear model:</p>
        <p>
          <graphic xlink:href="jmir_v22i5e19421_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </p>
        <p>where <italic>Ct</italic> is the difference in new case counts at day <italic>t</italic> from day <italic>t –</italic>1, <italic>S<sub>t-i</sub></italic> is the difference in sick post counts (normalized) at day <italic>t</italic> from day <italic>t –</italic>1, and <italic>I<sub>t</sub></italic> is a time-varying binary variable that equals 1 on February 12, 2020, the day on which Hubei adopted the fifth edition of the diagnostic criteria. This binary variable controls for the exogenous pulse of case counts [<xref ref-type="bibr" rid="ref32">32</xref>]. Since we collected Weibo posts from as early as November 1, 2019, 30 days before the first reported case of COVID-19 on December 1, 2019, we were able to test up to 29 lags of such posts (ie, <italic>m</italic> ≤ 29). The model is further explained as follows.</p>
        <p>First, difference scores instead of raw new case counts were used because Dickey-Fuller tests for the raw counts could not reject nonstationarity (ie, the presence of a unit root) for lag 3–29 at a 5% confidence level (see Table B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Both stationarity and the inclusion of autoregressive terms are required by Granger causality. In contrast, the Dicky-Fuller tests suggested that the difference scores of the case counts were stationary: nonstationarity was rejected for lag 1–12 at a 1% confidence level and for lag 13–29 at a 5% confidence level (see Table B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The Dickey-Fuller tests reached the same conclusion for the stationarities of the sick post counts and their difference scores (see Table B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). We thus also used the difference scores instead of the raw counts to reduce correlations among lag terms of sick post counts. This more clearly identifies their independent effects on case counts. In short, these difference scores can be interpreted as “daily-additional” cases or Weibo posts in addition to the counts from the previous day.  </p>
        <p>Second, to determine the number of lag terms to include (ie, <italic>m</italic> in the above formula), we compared model fit statistics while iteratively adding lag terms. The model comparison suggested that the inclusion of more lags continuously improved the model fit up to the maximum lags (ie, 29; see Table C in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). However, the parameter estimates did not change qualitatively after including more than 20 lags (see Tables D and E in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). For parsimony and statistical power, we settled at 20 lags for the following analyses. </p>
        <p>Finally, we included a binary variable to control for the change in the diagnostic criteria of COVID-19 on Feb 12, 2020, following the procedure of intervention analysis [<xref ref-type="bibr" rid="ref33">33</xref>]. Because this change is unlikely to induce permanent changes to case counts, an instant pulse function was applied at the date of the change. We also tested models that allowed the effect to linearly decay in 2, 3, 4, or 5 days; these models fitted the data more poorly than the model with an instant pulse (see Table F in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>Ordinary least squares regression with robust standard errors was used to estimate the final models. With 20 lag terms in the model, the modeled data include daily-additional new COVID-19 cases from December 1, 2019 to March 31, 2020 and daily-additional counts of sick posts and other COVID-19 posts from November 10, 2019 to March 11, 2020 (<italic>N</italic>=122). </p>
      <p><xref rid="figure4" ref-type="fig">Figure 4</xref>A summarizes the estimates of Granger causality for sick posts predicting new COVID-19 cases with standardized regression coefficients (see Table G in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for all estimated parameters). Particularly, one standard deviation of increase in the daily-additional sick posts (1 sick post per 1 million posts) predicted a 0.133 (95% CI 0.065-0.201) to 0.275 (95% CI 0.134-0.416) standard deviation of the increase in the daily-additional new cases 1-14 days in advance. After including the 20 lags of sick posts, the adjusted <italic>R<sup>2</sup></italic> value of the model increased by 0.128, suggesting that sick posts could explain an additional 12.8% of the variance of daily-additional new cases beyond the autoregressive terms and intervention effects.</p>
      <p>Furthermore, we estimated the relationship between other COVID-19 post counts and daily-additional new cases using the same linear model. <xref rid="figure4" ref-type="fig">Figure 4</xref>A further illustrates the standardized estimates. Compared with sick posts, other COVID-19 posts were weaker signals of future case counts, as demonstrated by their smaller standard regression coefficients. This indicates that Weibo posts that discussed some aspect of COVID-19 but did not explicitly report a person’s symptoms or diagnosis had lower forecasting power than sick posts.  </p>
      <p>To corroborate the above results, we tested the Granger causality of sick posts on cases within Hubei and outside Hubei (see Table H in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Within Hubei, the results generally agreed with the national pattern mentioned above. Daily-additional sick posts predicted daily-additional new cases in Hubei up to 19 days in advance, as illustrated in <xref rid="figure4" ref-type="fig">Figure 4</xref>B. In contrast, other COVID-19 posts had fewer lag terms that could forecast new cases. Outside Hubei, the predictive pattern of sick posts was similar to the national pattern despite a limited time range: sick posts could forecast new cases 2 to 8 days in advance (see <xref rid="figure4" ref-type="fig">Figure 4</xref>C).</p>
      <fig id="figure4" position="float">
        <label>Figure 4</label>
        <caption>
          <p>Standardized estimates of Granger causality for time-lagged, daily-additional Weibo posts (sick posts and other COVID-19 posts) predicting daily-additional cases.</p>
        </caption>
        <graphic xlink:href="jmir_v22i5e19421_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The novel coronavirus causing COVID-19 is a new pathogen in the human reservoir. It poses an extraordinary challenge for public health systems worldwide because screening and diagnostic tests must be developed from scratch. Even when such tests eventually become available, testing capacity is often severely limited; this can fuel the outbreak, as many patients can unknowingly infect other people. Based on approximately 15 million COVID-19-related Weibo posts between November 1, 2019 and March 31, 2020, we developed a supervised machine learning classifier to identify “sick posts,” in which a user reports their own or other people’s symptoms and diagnosis of COVID-19. Using the officially reported daily case counts as the outcome, our work shows that sick posts significantly predict daily cases up to 14 days ahead of official statistics. This finding confirms prior research that social media data can be usefully applied to nowcasting and forecasting emerging infectious diseases such as COVID-19 [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. </p>
        <p>One of the greatest challenges of digital disease surveillance is identifying true disease signals, especially when facing the deluge of social media activity that resulted from COVID-19 mitigation measures [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. Our finding that sick posts have greater predictive power than other COVID-19 posts shows that not all social media data are equally informative. Specifically, COVID-19 has dramatically disrupted everyday life; due to the pandemic, people are sheltering in place and increasingly communicating with others via social media. As shown in prior work [<xref ref-type="bibr" rid="ref18">18</xref>] as well as in our data set, the majority of COVID-19–related chatter on Weibo reflected public awareness of COVID-19 rather than actual symptom reports. Most previous studies took rather coarse-grained approaches, relying primarily on either aggregated search query data or social media data retrieved from limited keyword searches [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. In our work, we gathered the largest, most comprehensive, and most granular collection of social media data related to COVID-19 in the Chinese language. More importantly, we demonstrate a viable method to separate valid signals from noise using reports of symptoms and diagnosis, which makes a significant contribution to the literature on digital surveillance. </p>
        <p>Another important finding is that while the predictive power of sick posts on daily case counts holds true for both Hubei and non-Hubei regions, the effect sizes vary. Being the epicenter of the outbreak, Hubei province experienced extreme testing shortages during the early stage of the study period. As a result, many Hubei residents turned to social media sites such as Weibo to seek help for testing and medical care. In contrast, social media help-seeking activities were uncommon in other parts of China, where testing and health care resources were much more adequate. Taking these regional variations into account, we still observed predictive signals of sick posts on case counts, suggesting that the predictive power of sick posts was robust against testing delays. Further, the variations in the effect estimates show that the predictive power of social media data may vary across different geographic areas, with different levels of preparedness, and at different stages of the outbreak. Future studies based on longer periods of data monitoring could explore the temporal and spatial variations of COVID-19 social media surveillance efficacy in more depth.</p>
        <p>Our work has broad public health implications. The high speed and low cost of social media surveillance can be especially useful in the early stages of the COVID-19 outbreak to inform containment and mitigation efforts when they are most cost-effective. For countries and regions where public health infrastructures do not allow for widespread screening and diagnostic tests, social media disease surveillance provides much-needed information for public health agencies to model the trajectories of the outbreak and to make swift decisions about allocation of resources such as hospital beds, ventilators, and personal protective equipment. </p>
        <p>Another advantage of social media surveillance is that it can be performed from a distance. As COVID-19 continues to spread worldwide, countries lacking testing and screening infrastructures will become “dark spots,” endangering their own citizens as well as the entire world. It is imperative that international organizations such as the World Health Organization integrate such data into their outbreak forecasting management practices to mobilize and coordinate relief efforts to help combat COVID-19. </p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has several limitations. First, Weibo posts were retrieved retrospectively rather than in real time; therefore, deleted or censored posts were absent from our data set. However, we have no reason to believe that deletion or censorship favored “sick posts” in measurable ways. In fact, a recent study on Weibo censorship from December 2019-February 2020 shows that only 1.7/1000 Weibo posts were censored; also, these censored posts generally pertained to the missteps in the government’s COVID-19 response, not individual reports of symptoms and diagnoses [<xref ref-type="bibr" rid="ref37">37</xref>]. Therefore, our results should not be affected by censorship. Second, as some studies suggest [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>], confirmed COVID-19 case counts published by China CDC may underestimate the actual counts, due in part to limits in testing capacity and the existence of asymptomatic carriers. Still, the data here represent the best-known data of confirmed case counts, and our models rely on trends and changes in these case counts rather than the actual numbers. Third, it is important to acknowledge that sick posts as disease signals are not without noise because Weibo users who reported COVID-19 symptoms were not necessarily clinically diagnosed with COVID-19; Weibo users may not speak the truth; and Weibo users may “overreport” (posting about their symptoms or diagnoses multiple times) or “underreport” (not posting despite their symptoms or diagnoses) for a variety of reasons. Such inaccuracies are inherent in user-generated social media data and widely exist in all infoveillance studies. However, it should be noted that the goal of infoveillance has never been to achieve one-for-one matching between social media posts and clinical cases. Rather, infoveillance approaches strive to mine useful early signals from social media and internet data as a supplement to conventional surveillance efforts. Despite this noise, we still found that sick posts predicted COVID-19 case counts, indicating the validity of this signal in reflecting disease spread in the population.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The threats of COVID-19 and other infectious diseases are likely to recur in the future. Reports of symptoms and diagnoses on social media during emerging disease outbreaks send invaluable warning signals to the public. Researchers and disease control agencies should pay close attention to the social media infosphere. In addition to monitoring overall search and posting activities, it is crucial to sift through the contents and efficiently separate true signals from noise. Our main findings highlight the importance of using rigorous procedures and understanding information sharing behaviors to obtain quality disease signals. Future studies based on longer periods of data monitoring could explore the time and spatial diffusions of COVID-19 in more depth. A more detailed examination of post contents reporting restraints in information or medical resources will be helpful in developing local outbreak responses. </p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary tables.</p>
        <media xlink:href="jmir_v22i5e19421_app1.docx" xlink:title="DOCX File , 182 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">China CDC</term>
          <def>
            <p>Chinese Center for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">COVID-19</term>
          <def>
            <p>coronavirus disease</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We thank Jingyang Xu, Minwei Ren, Rixia Tang, Zichao Wang, Yongyan Xu, Na Yang, Yalan Jin, Xiuchan Xu, Xinyu Wang, Ruizhi Sun, Wenhui Zhu, Yiwei Li, and Tianyu Zhao for their help with data annotation.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>CS, WL, JZ, and BF contributed to the study design. AC collected the Weibo data. WL, CL and AC contributed to the data analysis. WL, CS, CL, and AC contributed to the design and drawing of the figures. All authors contributed to the writing of the manuscript. </p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Clinical features of patients infected with 2019 novel coronavirus in Wuhan, China</article-title>
          <source>Lancet</source>
          <year>2020</year>
          <month>02</month>
          <volume>395</volume>
          <issue>10223</issue>
          <fpage>497</fpage>
          <lpage>506</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(20)30183-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Holmes</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A new coronavirus associated with human respiratory disease in China</article-title>
          <source>Nature</source>
          <year>2020</year>
          <month>2</month>
          <day>3</day>
          <volume>579</volume>
          <issue>7798</issue>
          <fpage>265</fpage>
          <lpage>269</lpage>
          <pub-id pub-id-type="doi">10.1038/s41586-020-2008-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <source>World Health Organization</source>
          <year>2020</year>
          <month>05</month>
          <day>16</day>
          <access-date>2020-05-26</access-date>
          <comment>Coronavirus disease 2019 (COVID-19) Situation Report 117<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/docs/default-source/coronaviruse/situation-reports/20200516-covid-19-sitrep-117.pdf?sfvrsn=8f562cc_2">https://www.who.int/docs/default-source/coronaviruse/situation-reports/20200516-covid-19-sitrep-117.pdf?sfvrsn=8f562cc_2</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Centola</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Social Networks and Health: New Developments in Diffusion, Online and Offline</article-title>
          <source>Annu Rev Sociol</source>
          <year>2019</year>
          <month>07</month>
          <day>30</day>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>91</fpage>
          <lpage>109</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev-soc-073117-041421</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chew</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pandemics in the age of Twitter: content analysis of Tweets during the 2009 H1N1 outbreak</article-title>
          <source>PLoS One</source>
          <year>2010</year>
          <month>11</month>
          <day>29</day>
          <volume>5</volume>
          <issue>11</issue>
          <fpage>e14118</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0014118"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0014118</pub-id>
          <pub-id pub-id-type="medline">21124761</pub-id>
          <pub-id pub-id-type="pmcid">PMC2993925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance: framework for an emerging set of public health informatics methods to analyze search, communication and publication behavior on the Internet</article-title>
          <source>J Med Internet Res</source>
          <year>2009</year>
          <month>03</month>
          <day>27</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2009/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1157</pub-id>
          <pub-id pub-id-type="medline">19329408</pub-id>
          <pub-id pub-id-type="pii">v11i1e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC2762766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aiello</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Renson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zivich</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Social Media- and Internet-Based Disease Surveillance for Public Health</article-title>
          <source>Annu Rev Public Health</source>
          <year>2020</year>
          <month>04</month>
          <day>02</day>
          <volume>41</volume>
          <fpage>101</fpage>
          <lpage>118</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-040119-094402</pub-id>
          <pub-id pub-id-type="medline">31905322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barros</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Duggan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rebholz-Schuhmann</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The Application of Internet-Based Sources for Public Health Surveillance (Infoveillance): Systematic Review</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>03</month>
          <day>13</day>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>e13680</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/3/e13680/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13680</pub-id>
          <pub-id pub-id-type="medline">32167477</pub-id>
          <pub-id pub-id-type="pii">v22i3e13680</pub-id>
          <pub-id pub-id-type="pmcid">PMC7101503</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charles-Smith</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Reynolds</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>EHY</given-names>
            </name>
            <name name-style="western">
              <surname>Olsen</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlin</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Shigematsu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Streichert</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Suda</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Corley</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Using Social Media for Actionable Disease Surveillance and Outbreak Management: A Systematic Literature Review</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>10</issue>
          <fpage>e0139701</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0139701"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0139701</pub-id>
          <pub-id pub-id-type="medline">26437454</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-21599</pub-id>
          <pub-id pub-id-type="pmcid">PMC4593536</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Chinese social media analysis for disease surveillance</article-title>
          <source>Pers Ubiquit Comput</source>
          <year>2015</year>
          <month>9</month>
          <day>11</day>
          <volume>19</volume>
          <issue>7</issue>
          <fpage>1125</fpage>
          <lpage>1132</lpage>
          <pub-id pub-id-type="doi">10.1007/s00779-015-0877-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>IC</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ying</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Schaible</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tse</surname>
              <given-names>ZT</given-names>
            </name>
          </person-group>
          <article-title>Chinese social media reaction to the MERS-CoV and avian influenza A(H7N9) outbreaks</article-title>
          <source>Infect Dis Poverty</source>
          <year>2013</year>
          <month>12</month>
          <day>20</day>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://idpjournal.biomedcentral.com/articles/10.1186/2049-9957-2-31"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/2049-9957-2-31</pub-id>
          <pub-id pub-id-type="medline">24359669</pub-id>
          <pub-id pub-id-type="pii">2049-9957-2-31</pub-id>
          <pub-id pub-id-type="pmcid">PMC3878123</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>National and local influenza surveillance through Twitter: an analysis of the 2012-2013 influenza epidemic</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e83672</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0083672"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0083672</pub-id>
          <pub-id pub-id-type="medline">24349542</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-35058</pub-id>
          <pub-id pub-id-type="pmcid">PMC3857320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klembczyk</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jalalpour</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Washington</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Pines</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Rothman</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Dugas</surname>
              <given-names>AF</given-names>
            </name>
          </person-group>
          <article-title>Google Flu Trends Spatial Variability Validated Against Emergency Department Influenza-Related Visits</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <month>06</month>
          <day>28</day>
          <volume>18</volume>
          <issue>6</issue>
          <fpage>e175</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2016/6/e175/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5585</pub-id>
          <pub-id pub-id-type="medline">27354313</pub-id>
          <pub-id pub-id-type="pii">v18i6e175</pub-id>
          <pub-id pub-id-type="pmcid">PMC4942685</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dugas</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Hsieh</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Pines</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Mareiniss</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Mohareb</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gaydos</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Perl</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Rothman</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>Google Flu Trends: correlation with emergency department influenza rates and crowding metrics</article-title>
          <source>Clin Infect Dis</source>
          <year>2012</year>
          <month>02</month>
          <day>15</day>
          <volume>54</volume>
          <issue>4</issue>
          <fpage>463</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22230244"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/cid/cir883</pub-id>
          <pub-id pub-id-type="medline">22230244</pub-id>
          <pub-id pub-id-type="pii">cir883</pub-id>
          <pub-id pub-id-type="pmcid">PMC3404718</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McGough</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hawkins</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Forecasting Zika Incidence in the 2016 Latin America Outbreak Combining Traditional Disease Surveillance with Search, Social Media, and News Report Data</article-title>
          <source>PLoS Negl Trop Dis</source>
          <year>2017</year>
          <month>01</month>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e0005295</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pntd.0005295"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pntd.0005295</pub-id>
          <pub-id pub-id-type="medline">28085877</pub-id>
          <pub-id pub-id-type="pii">PNTD-D-16-01733</pub-id>
          <pub-id pub-id-type="pmcid">PMC5268704</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Big data. The parable of Google Flu: traps in big data analysis</article-title>
          <source>Science</source>
          <year>2014</year>
          <month>03</month>
          <day>14</day>
          <volume>343</volume>
          <issue>6176</issue>
          <fpage>1203</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1126/science.1248506</pub-id>
          <pub-id pub-id-type="medline">24626916</pub-id>
          <pub-id pub-id-type="pii">343/6176/1203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tsoi</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Characterizing the Propagation of Situational Information in Social Media During COVID-19 Epidemic: A Case Study on Weibo</article-title>
          <source>IEEE Trans Comput Soc Syst</source>
          <year>2020</year>
          <month>4</month>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>556</fpage>
          <lpage>562</lpage>
          <pub-id pub-id-type="doi">10.1109/tcss.2020.2980007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Grépin</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>IC</given-names>
            </name>
          </person-group>
          <article-title>Limited Early Warnings and Public Attention to Coronavirus Disease 2019 in China, January-February, 2020: A Longitudinal Cohort of Randomly Sampled Weibo Users</article-title>
          <source>Disaster Med Public Health Prep</source>
          <year>2020</year>
          <month>04</month>
          <day>03</day>
          <fpage>1</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32241328"/>
          </comment>
          <pub-id pub-id-type="doi">10.1017/dmp.2020.68</pub-id>
          <pub-id pub-id-type="medline">32241328</pub-id>
          <pub-id pub-id-type="pii">S1935789320000683</pub-id>
          <pub-id pub-id-type="pmcid">PMC7171227</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Cuomo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Purushothaman</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Data Mining and Content Analysis of the Chinese Social Media Platform Weibo During the Early COVID-19 Outbreak: Retrospective Observational Infoveillance Study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>04</month>
          <day>21</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e18700</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e18700/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18700</pub-id>
          <pub-id pub-id-type="medline">32293582</pub-id>
          <pub-id pub-id-type="pii">v6i2e18700</pub-id>
          <pub-id pub-id-type="pmcid">PMC7175787</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Chinese Public's Attention to the COVID-19 Epidemic on Social Media: Observational Descriptive Study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>05</month>
          <day>04</day>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>e18825</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/5/e18825/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18825</pub-id>
          <pub-id pub-id-type="medline">32314976</pub-id>
          <pub-id pub-id-type="pii">v22i5e18825</pub-id>
          <pub-id pub-id-type="pmcid">PMC7199804</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Mining the Characteristics of COVID-19 Patients in China: Analysis of Social Media Posts</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>05</month>
          <day>17</day>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>e19087</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/5/e19087/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19087</pub-id>
          <pub-id pub-id-type="medline">32401210</pub-id>
          <pub-id pub-id-type="pii">v22i5e19087</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pang</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Retrospective analysis of the possibility of predicting the COVID-19 outbreak from Internet searches and social media data, China, 2020</article-title>
          <source>Euro Surveill</source>
          <year>2020</year>
          <month>03</month>
          <volume>25</volume>
          <issue>10</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.eurosurveillance.org/content/10.2807/1560-7917.ES.2020.25.10.2000199"/>
          </comment>
          <pub-id pub-id-type="doi">10.2807/1560-7917.ES.2020.25.10.2000199</pub-id>
          <pub-id pub-id-type="medline">32183935</pub-id>
          <pub-id pub-id-type="pmcid">PMC7078825</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <source>weibo.com</source>
          <comment>2019 Annual Sina Weibo User Report<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://ir.weibo.com/node/7726/html">http://ir.weibo.com/node/7726/html</ext-link> </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Top-down or Bottom-up? A Network Agenda-setting Study of Chinese Nationalism on Social Media</article-title>
          <source>J Broadcasting Electron Media</source>
          <year>2019</year>
          <month>09</month>
          <day>20</day>
          <volume>63</volume>
          <issue>3</issue>
          <fpage>512</fpage>
          <lpage>533</lpage>
          <pub-id pub-id-type="doi">10.1080/08838151.2019.1653104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The evolution of online discussions about GMOs in China over the past decade: Changes, causes and characteristics</article-title>
          <source>Cultures of Science</source>
          <year>2020</year>
          <month>01</month>
          <day>20</day>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>311</fpage>
          <lpage>325</lpage>
          <pub-id pub-id-type="doi">10.1177/209660831900200406</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <source>weibo.com</source>
          <year>2019</year>
          <comment>2018 Annual Sina Weibo User Report. Webpage in Chinese<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://data.weibo.com/report/reportDetail?id=433">https://data.weibo.com/report/reportDetail?id=433</ext-link> </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lacy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Watson</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Riffe</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lovejoy</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Issues and Best Practices in Content Analysis</article-title>
          <source>Journal Mass Commun Q</source>
          <year>2015</year>
          <month>09</month>
          <day>28</day>
          <volume>92</volume>
          <issue>4</issue>
          <fpage>791</fpage>
          <lpage>811</lpage>
          <pub-id pub-id-type="doi">10.1177/1077699015607338</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>XL</given-names>
            </name>
          </person-group>
          <source>arXiv</source>
          <year>2020</year>
          <month>05</month>
          <day>21</day>
          <access-date>2020-05-26</access-date>
          <comment>Weibo-COV: A Large-Scale COVID-19 Social Media Dataset from Weibo<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2005.09174">https://arxiv.org/abs/2005.09174</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Viboud</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Early epidemiological analysis of the coronavirus disease 2019 outbreak based on crowdsourced data: a population-level observational study</article-title>
          <source>Lancet Digit Health</source>
          <year>2020</year>
          <month>04</month>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>e201</fpage>
          <lpage>e208</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(20)30026-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(20)30026-1</pub-id>
          <pub-id pub-id-type="medline">32309796</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(20)30026-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7158945</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <source>China Center for Disease Control and Prevention</source>
          <year>2020</year>
          <month>02</month>
          <day>12</day>
          <access-date>2020-05-26</access-date>
          <comment>COVID-19 Situation Report on Feburary 12, 2020. Webpage in Chinese<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.chinacdc.cn/jkzt/crb/zl/szkb_11803/jszl_11809/202002/t20200213_212624.html">http://www.chinacdc.cn/jkzt/crb/zl/szkb_11803/jszl_11809/202002/t20200213_212624.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Granger</surname>
              <given-names>CWJ</given-names>
            </name>
          </person-group>
          <article-title>Investigating Causal Relations by Econometric Models and Cross-spectral Methods</article-title>
          <source>Econometrica</source>
          <year>1969</year>
          <month>08</month>
          <volume>37</volume>
          <issue>3</issue>
          <fpage>424</fpage>
          <pub-id pub-id-type="doi">10.2307/1912791</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Box</surname>
              <given-names>GEP</given-names>
            </name>
            <name name-style="western">
              <surname>Tiao</surname>
              <given-names>GC</given-names>
            </name>
          </person-group>
          <article-title>Intervention Analysis with Applications to Economic and Environmental Problems</article-title>
          <source>J Am Stat Assoc</source>
          <year>1975</year>
          <month>03</month>
          <volume>70</volume>
          <issue>349</issue>
          <fpage>70</fpage>
          <lpage>79</lpage>
          <pub-id pub-id-type="doi">10.1080/01621459.1975.10480264</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Box-Steffensmeier</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hitt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pevehouse</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Time Series Analysis for the Social Sciences</source>
          <year>2014</year>
          <publisher-loc>Cambridge, UK</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buckee</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Improving epidemic surveillance and response: big data is dead, long live big data</article-title>
          <source>Lancet Digit Health</source>
          <year>2020</year>
          <month>05</month>
          <volume>2</volume>
          <issue>5</issue>
          <fpage>e218</fpage>
          <lpage>e220</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/S2589-7500(20)30059-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(20)30059-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Corona Virus (COVID-19) "Infodemic" and Emerging Issues through a Data Lens: The Case of China</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2020</year>
          <month>03</month>
          <day>30</day>
          <volume>17</volume>
          <issue>7</issue>
          <fpage>2309</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph17072309"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph17072309</pub-id>
          <pub-id pub-id-type="medline">32235433</pub-id>
          <pub-id pub-id-type="pii">ijerph17072309</pub-id>
          <pub-id pub-id-type="pmcid">PMC7177854</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Crowdsourcing data to mitigate epidemics</article-title>
          <source>Lancet Digit Health</source>
          <year>2020</year>
          <month>04</month>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>e156</fpage>
          <lpage>e157</lpage>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(20)30055-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Did the world overlook the media’s early warning of COVID-19?</article-title>
          <source>J Risk Res</source>
          <year>2020</year>
          <month>04</month>
          <day>24</day>
          <fpage>1</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1080/13669877.2020.1756380</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kucharski</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Russell</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Diamond</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Edmunds</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Eggo</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jit</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Munday</surname>
              <given-names>Jd</given-names>
            </name>
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gimma</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van Zandvoort</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbs</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hellewell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jarvis</surname>
              <given-names>Ci</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Quilty</surname>
              <given-names>Bj</given-names>
            </name>
            <name name-style="western">
              <surname>Bosse</surname>
              <given-names>Ni</given-names>
            </name>
            <name name-style="western">
              <surname>Abbott</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Klepac</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Flasche</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Early dynamics of transmission and control of COVID-19: a mathematical modelling study</article-title>
          <source>Lancet Infect Dis</source>
          <year>2020</year>
          <month>05</month>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>553</fpage>
          <lpage>558</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/S1473-3099(20)30144-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S1473-3099(20)30144-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dorigatti</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cori</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Donnelly</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ferguson</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>Imperial College</source>
          <year>2020</year>
          <month>01</month>
          <day>22</day>
          <access-date>2020-05-26</access-date>
          <comment>Report 2: Estimating the potential total number of novel Coronavirus (2019-nCoV) cases in Wuhan City, China<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-update-epidemic-size-22-01-2020.pdf">https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-update-epidemic-size-22-01-2020.pdf</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Jt</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>Gm</given-names>
            </name>
          </person-group>
          <article-title>Nowcasting and forecasting the potential domestic and international spread of the 2019-nCoV outbreak originating in Wuhan, China: a modelling study</article-title>
          <source>Lancet</source>
          <year>2020</year>
          <month>02</month>
          <volume>395</volume>
          <issue>10225</issue>
          <fpage>689</fpage>
          <lpage>697</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(20)30260-9</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
