<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i8e20285</article-id>
      <article-id pub-id-type="pmid">32730217</article-id>
      <article-id pub-id-type="doi">10.2196/20285</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Real-Time Forecasting of the COVID-19 Outbreak in Chinese Provinces: Machine Learning Approach Using Novel Digital Data and Estimates From Mechanistic Models</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Giabbanelli</surname>
            <given-names>Philippe</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Fagherazzi</surname>
            <given-names>Guy</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Dianbo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3042-9161</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Clemente</surname>
            <given-names>Leonardo</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8939-8841</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Poirier</surname>
            <given-names>Canelle</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6972-2621</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Ding</surname>
            <given-names>Xiyu</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6852-2962</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Chinazzi</surname>
            <given-names>Matteo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5955-1929</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Davis</surname>
            <given-names>Jessica</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0726-1855</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Vespignani</surname>
            <given-names>Alessandro</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3419-4205</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Santillana</surname>
            <given-names>Mauricio</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Computational Health Informatics Program</institution>
            <institution>Boston Children’s Hospital</institution>
            <addr-line>300 Longwood Avenue</addr-line>
            <addr-line>Landmark 5th Floor East</addr-line>
            <addr-line>Boston, MA, 02215</addr-line>
            <country>United States</country>
            <phone>1 (617) 919 1795</phone>
            <email>msantill@g.harvard.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4206-418X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Computational Health Informatics Program</institution>
        <institution>Boston Children’s Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Pediatrics</institution>
        <institution>Harvard Medical School</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Tecnologico de Monterrey</institution>
        <addr-line>Monterrey</addr-line>
        <country>Mexico</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Harvard TH Chan School of Public Health</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Laboratory for the Modeling of Biological and Socio-technical Systems</institution>
        <institution>Northeastern University</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>ISI Foundation</institution>
        <addr-line>Turin</addr-line>
        <country>Italy</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Mauricio Santillana <email>msantill@g.harvard.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>17</day>
        <month>8</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>8</issue>
      <elocation-id>e20285</elocation-id>
      <history>
        <date date-type="received">
          <day>14</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>24</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>24</day>
          <month>7</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Dianbo Liu, Leonardo Clemente, Canelle Poirier, Xiyu Ding, Matteo Chinazzi, Jessica Davis, Alessandro Vespignani, Mauricio Santillana. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 17.08.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/8/e20285/" xlink:type="simple"/>
      <related-article related-article-type="correction-forward" xlink:title="This is a corrected version. See correction statement in:" xlink:href="https://www.jmir.org/2020/9/e23996/" vol="22" page="e23996"> </related-article>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The inherent difficulty of identifying and monitoring emerging outbreaks caused by novel pathogens can lead to their rapid spread; and if left unchecked, they may become major public health threats to the planet. The ongoing coronavirus disease (COVID-19) outbreak, which has infected over 2,300,000 individuals and caused over 150,000 deaths, is an example of one of these catastrophic events.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We present a timely and novel methodology that combines disease estimates from mechanistic models and digital traces, via interpretable machine learning methodologies, to reliably forecast COVID-19 activity in Chinese provinces in real time.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Our method uses the following as inputs: (a) official health reports, (b) COVID-19–related internet search activity, (c) news media activity, and (d) daily forecasts of COVID-19 activity from a metapopulation mechanistic model. Our machine learning methodology uses a clustering technique that enables the exploitation of geospatial synchronicities of COVID-19 activity across Chinese provinces and a data augmentation technique to deal with the small number of historical disease observations characteristic of emerging outbreaks.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our model is able to produce stable and accurate forecasts 2 days ahead of the current time and outperforms a collection of baseline models in 27 out of 32 Chinese provinces.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our methodology could be easily extended to other geographies currently affected by COVID-19 to aid decision makers with monitoring and possibly prevention.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>coronavirus</kwd>
        <kwd>digital epidemiology</kwd>
        <kwd>modeling</kwd>
        <kwd>modeling disease outbreaks</kwd>
        <kwd>emerging outbreak</kwd>
        <kwd>machine learning</kwd>
        <kwd>precision public health</kwd>
        <kwd>machine learning in public health</kwd>
        <kwd>forecasting</kwd>
        <kwd>digital data</kwd>
        <kwd>mechanistic model</kwd>
        <kwd>hybrid simulation</kwd>
        <kwd>hybrid model</kwd>
        <kwd>simulation</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>First detected in Wuhan, China, in December 2019, severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) infection had rapidly spread by late January 2020 to all Chinese provinces and many other countries [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. On January 30, 2020, the World Health Organization (WHO) issued a Public Health Emergency of International Concern (PHEIC) [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]; and on March 11th, the WHO declared the coronavirus disease (COVID-19) a pandemic [<xref ref-type="bibr" rid="ref5">5</xref>]. By April 18, 2020, the virus had affected more than 2,300,000 people and caused the deaths of 150,000 in more than 180 countries [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p/>
      <p>In the last decade, methods that leverage data from internet-based data sources and data from traditional surveillance systems have emerged as a complementary alternative to provide near real-time disease activity estimates (eg, for influenza and dengue) [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Despite the fact that these methodologies have successfully addressed delays in the availability of health reports as well as case count data quality issues, developing predictive models for an emerging disease outbreak such as COVID-19 is an even more challenging task [<xref ref-type="bibr" rid="ref14">14</xref>]. There are multiple reasons for this; for example, the availability of epidemiological information for this disease is scarce (there is no historical precedent about the behavior of the disease); the daily/weekly epidemiological reports that become available are frequently revised and corrected retrospectively to account for mistakes in data collection and reporting (a common practice in public health reports); and the presence of a diverse array of uncertainties about disease burden due in part to underreporting of cases [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      <p>Most efforts to estimate the time evolution of COVID-19 spread and the effect of public health interventions have relied on mechanistic models that parameterize transmission and epidemiological characteristics to produce forecasts of disease activity [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. In contrast, only a limited number of studies have investigated ways to track COVID-19 activity, leveraging internet search data [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref18">18</xref>], and few to the best of our knowledge have combined internet-based data sources and mechanistic estimates to forecast COVID-19 activity [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
      <p>We present a novel hybrid methodology that combines mechanistic and machine learning methodologies to successfully forecast COVID-19 in real time at the province level in China [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. We used a data-driven approach to incorporate inputs from (a) official health reports from Chinese Center for Disease Control and Prevention (China CDC), (b) COVID-19–related internet search activity from Baidu, (c) news media activity reported by Media Cloud, and (d) daily forecasts of COVID-19 activity from the simulation epidemiological model GLEAM (global epidemic and mobility), a metapopulation mechanistic model [<xref ref-type="bibr" rid="ref16">16</xref>]. Inspired by a methodology previously used to successfully forecast seasonal influenza in the United States at the state level [<xref ref-type="bibr" rid="ref11">11</xref>] and previous methods to monitor emerging outbreaks [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>], our method is capable of reliably forecasting COVID-19 activity even when limited historical disease activity observations are available. From a methodological perspective, the novelty in our approach comes from a clustering technique that enables the exploitation of geospatial synchronicities of COVID-19 activity across Chinese provinces and a data augmentation technique to mitigate the scarcity of historical data for model training.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Experimental Design</title>
        <p>Our method was designed for forecasting COVID-19 2 days ahead of the current time. We used as inputs the following data sources: COVID-19 activity reports from China CDC; internet search frequencies from Baidu; a number of related news reports from 311 media sources, as reported by the Media Cloud platform; and COVID-19 daily forecasts from a metapopulation mechanistic model. Our machine learning methodology also used a clustering and data augmentation technique. We provide details about data sources and statistical methods in the following sections.</p>
      </sec>
      <sec>
        <title>Data Sources</title>
        <sec>
          <title>Daily Reports of COVID-19</title>
          <p>Case counts of COVID-19 were obtained from China CDC. These data are curated and publicly available via the Models of Infectious Disease Agent Study (MIDAS) association [<xref ref-type="bibr" rid="ref24">24</xref>]. All data were collected on the original date they became available. Indeed, case counts released by China CDC can be revised, up to several weeks later. In this study, we only used unrevised data, which is the real case scenario to produce real-time estimates. The reports, available for all the provinces, include various activity trends such as new diagnosed cases, new suspected cases, and new reported deaths. For our study, we selected the number of confirmed cases as the epidemiological target and collected activity reports from January 10, 2020, to February 21, 2020.</p>
        </sec>
        <sec>
          <title>Baidu Internet Search Activity: Data Exclusion</title>
          <p>We collected the daily search fraction for three different COVID-19–related search terms in Mandarin (“COVID-19 symptoms” [“新冠肺炎症状”], “how many degree is fever” [“多少度算发烧”], and “symptoms of fever” [“发烧症状”]). These terms were selected based on their correlation and potential association with case counts of COVID-19 [<xref ref-type="bibr" rid="ref25">25</xref>] and collected individually for each province from January 1, 2020, to February 21, 2020. Our decision to use internet activity as a source of information is based on the hypothesis that search frequencies from COVID-19–related keywords reflect, to an extent, the number of people presenting symptoms related to COVID-19 before their arrival at a clinic. Given Baidu imposes limits to data access for researchers, we were unable to conduct a broad analysis on a wide range of keywords. A visualization of the Baidu search term time series can be seen in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Visualization of the evolution of coronavirus disease (COVID-19) cases and Baidu search trends. The evolution of COVID-19 cases is represented in gray and Baidu search trends in green and orange. All-time series have been smoothed for visualization purposes.</p>
            </caption>
            <graphic xlink:href="jmir_v22i8e20285_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>News Reports</title>
          <p>An online open-source platform called Media Cloud, which allows the tracking and analysis of media for any topic of interest through the matching of keywords, was used. We obtained volumes of the number of news articles available over time from a collection of 311 Chinese media websites using the keywords “coronavirus,” “COVID-19,” “2019-nCoV,” “pneumonia,” “fever,” “cough,” and the name of each province to generate province-specific news activity trends. Media data from January 1, 2020, to February 21, 2020 were collected and used as additional source information.</p>
        </sec>
        <sec>
          <title>Global Epidemic and Mobility Model</title>
          <p>The global epidemic and mobility model, GLEAM, is an individual-based, stochastic, and spatial epidemic model [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>] that has been used to simulate the early stages of the COVID-19 epidemic in mainland China and across the world [<xref ref-type="bibr" rid="ref16">16</xref>]. GLEAM is based on a metapopulation approach in which the world population is divided into subpopulations centered around major transportation hubs (usually airports). Over 3000 subpopulations in about 200 different countries and territories are included in the model. The subpopulations are connected by short-range commuting and long-range travel networks that determine the flow of individuals traveling daily among them. Short-range mobility patterns (eg, daily commuting) are derived from data collected from the National Statistical Offices of 30 countries on five continents [<xref ref-type="bibr" rid="ref26">26</xref>]. In addition, for the COVID-19 epidemic, mobility variations in mainland China are further calibrated using deidentified and aggregated domestic population movement data as derived from Baidu Location-Based Services. The airline transportation data consider daily origin-destination traffic flows obtained from the Official Aviation Guide and the International Air Transport Association databases (updated in 2019), and accounting for travel restrictions in 2020. Within each subpopulation, the human-to-human transmission of COVID-19 is modeled using a compartmental representation of the disease where each individual can occupy one of the following four states: susceptible (S), latent (L), infectious (I), and removed (R). Susceptible individuals can acquire the virus through contacts with individuals in the infectious state, and become latent, meaning they are infected but cannot transmit the infection yet. Latent individuals progress to the infectious stage with a rate inversely proportional to the latent period. Infectious individuals progress into the removed stage with a rate inversely proportional to the infectious period. Removed individuals represent those who can no longer infect others, meaning they were isolated, hospitalized, died, or have recovered.</p>
          <p>The model produces an ensemble of possible epidemic scenarios providing epidemic indicators, such as the number of newly generated infections and deaths in each subpopulation. The model is initialized by a starting date of the epidemic between November 15, 2019, and December 1, 2019, with 20 to 40 cases caused by zoonotic exposure [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref32">32</xref>]. The transmission dynamic is calibrated by using an Approximate Bayesian Computation approach to estimate the posterior distribution of the basic reproductive number R<sub>0</sub> that uses as evidence the detection of infections imported from China at international locations across the world [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. A sensitivity analysis has been performed on the initial conditions of the model considering different values for the mean latency period (range 3-6 days), the mean infectious period (range 2-8 days), the generation time (range 6-11 days), and the initial number of zoonotic cases (range 20-80). The calibrated model is then used to generate the out-of-sample ensemble of stochastic epidemic evolutions across mainland China.</p>
        </sec>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <sec>
          <title>Aggregation of Daily Reports</title>
          <p>To enhance signal and reduce noise, we aggregated case count, search volumes, and media article count for each <italic>δt</italic> = 2 days window.</p>
          <p>As COVID-19 is an emerging outbreak, the amount of epidemiological information, either official or unofficial, is low, and thus, limits our capacity to build predictive models. To maximize usage of data, we applied the strategies below.</p>
        </sec>
        <sec>
          <title>Clustering</title>
          <p>We clustered the 32 provinces into several groups and trained a model for each group. Clustering and model retraining processes were repeated on every single new prediction date. To determine the similarities in outbreak patterns across Chinese provinces, we calculated the pairwise correlation matrix for confirmed COVID-19 cases by using all historical data available. Then, based on similarity matrix, provinces were clustered by using complete linkage hierarchical clustering, which is an agglomerative hierarchical clustering method, creating clusters based on most dissimilar pairs [<xref ref-type="bibr" rid="ref38">38</xref>]. The number of clusters K was determined by choosing the K, thereby maximizing the Calinski-Harabasz index [<xref ref-type="bibr" rid="ref39">39</xref>]. Our clustering method gained higher stability when more data points were available for clustering [<xref ref-type="bibr" rid="ref40">40</xref>]. More details of the clustering method are presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref43">43</xref>].</p>
        </sec>
        <sec>
          <title>Data Augmentation</title>
          <p>We conducted data augmentation by using a bootstrap method to resample each data point of the training data set. We made 100 bootstrap samples for each data point to which we added a random Gaussian noise with a mean of 0 (SD 0.01). Due to the stochasticity of both the clustering algorithm and the model training processing, on each prediction day, we run the whole clustering-training process 20 times and take an average of the outputs as our final prediction. Our multistep approach may introduce stochasticity in three different steps: (a) the clustering process, (b) the data augmentation process, and (c) the regression algorithm. To ensure robustness of our prediction results, the whole process (from clustering to out-of-sample prediction) on each prediction date was repeated at least 20 times and the ensemble (via an averaging approach) predictions were reported as the final prediction. We chose to use an empirical approach to explore whether the number of computational experiments were sufficient to lead to a stable performance. In order to achieve this, we conducted ensemble prediction experiments using realizations from 1 to 50 prediction efforts. We documented the performance of these ensemble predictions using root mean square error (RMSE) and correlation in (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>, Table S1). The performance of the ARGONet + GLEAM method plateaued after about 10-15 realizations as seen on this table. Therefore, we concluded that 20 realizations of our algorithm was an adequate number to ensure robustness and stability of the prediction while not imposing too much computational burden.</p>
        </sec>
        <sec>
          <title>Predictive Model</title>
          <p>For our prediction task, we fitted a LASSO (least absolute shrinkage and selection operator) multivariable regularized linear model for every data set generated from our clustering and augmentation steps at time t.</p>
          <p>The LASSO technique minimizes the mean squared error between observations and predictions subject to a L1 norm constraint (more details of this method are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The number of new confirmed COVID-19 cases for the next bi-day can be then expressed as:</p>
          <p>
            <graphic xlink:href="jmir_v22i8e20285_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </p>
          <p>where <italic>y<sub>T + δt</sub></italic> is the estimate at date <italic>T + δt</italic>; <italic>δt</italic> = 2 days; <italic>y<sub>T</sub></italic> is the number of cases at date <italic>T</italic>; <italic>S<sub>T</sub></italic> is the search volume at date <italic>T; M<sub>T</sub></italic> is the number of media articles at date <italic>T</italic>; <italic>D<sub>T</sub></italic> is the number of deaths at date <italic>T</italic>; <italic>C<sub>T</sub></italic> is the number of cumulative cases at date <italic>T</italic>; and <italic>ϵ<sub>T + δt</sub></italic> is the normally distributed error term.</p>
          <p>Models were dynamically recalibrated, similar to the method presented by Santillana et al [<xref ref-type="bibr" rid="ref44">44</xref>] and Lu et al [<xref ref-type="bibr" rid="ref11">11</xref>]. Our method, ARGONet + GLEAM, was implemented in an R 3.5.3 environment with a glmnet 3.0-2 library.</p>
          <p>A summary of our method can be seen in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Summary of the methods used to obtain our coronavirus disease (COVID-19) estimates. GLEAM: global epidemic and mobility.</p>
            </caption>
            <graphic xlink:href="jmir_v22i8e20285_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Performance of Model and Relevance of Predictors</title>
          <p>Two different metrics were used to measure the performance of ARGONet + GLEAM: (1) the RMSE and (2) the Pearson correlation. To assess the predictive power of our methodology, we compared our performance against the following models:</p>
          <list list-type="order">
            <list-item>
              <p>Persistence rule (baseline): a rule-based model that uses the new case count at date <italic>T</italic> as an estimate of the prediction for <italic>T</italic>+<italic>δt</italic> so that <italic>yT</italic>+<italic>δt = yT</italic></p>
            </list-item>
            <list-item>
              <p>Autoregressive (AR): a simple AR model built on COVID-19 cases that occurred in the previous three AR lags (2-day reports) (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for more information on this model)</p>
            </list-item>
            <list-item>
              <p>ARGONet: an alternate version of our methodology that does not include any mechanistic information but including clustering and data augmentation approaches.</p>
            </list-item>
          </list>
          <p>As linear models are used in this study, the relevance of predictors in predicting new cases can be defined thanks to the associated factor of each term in the trained model. As all data were normalized using the z-score (strictly within the training data sets) during training and prediction, the associated factor can be approximately understood as how many standard deviations the predicted new cases <italic>y<sub>T</sub> + δt</italic> will change if 1 standard deviation changes in the predictor.</p>
        </sec>
      </sec>
      <sec>
        <title>Data Sharing</title>
        <p>All codes and data will be made available via the Harvard dataverse.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We produced 2-day-ahead (strictly out-of-sample) and real-time COVID-19 forecasts for 32 Chinese provinces for the time period spanning February 3, 2020, to February 21, 2020. A visual representation of our out-of-sample model forecasts is shown in <xref rid="figure3" ref-type="fig">Figure 3</xref> along with the subsequently observed COVID-19 cases, as reported by China CDC.</p>
      <p>Our results show that ARGONet + GLEAM outperforms the persistence model in 27 out of 32 Chinese provinces. Even in provinces where ARGONet + GLEAM failed to produce improvements to the baseline model, our model produced reasonable disease estimates as seen in <xref rid="figure3" ref-type="fig">Figure 3</xref>. These provinces include Shanxi, Liaoning, Taiwan, Hong Kong, and Guangxi (the latter three with very different administration, and likely health care, systems compared to the rest of the provinces).</p>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>Graphical visualization of the estimates obtained by ARGONet + GLEAM. The number of new confirmed cases for coronavirus disease (COVID-19), as reported by China CDC (solid black), along with ARGONet + GLEAM (solid red) 2-day, ahead-of-time estimates between February 3, 2020, to February 21, 2020. As a comparison, the dotted blue line represents the persistence model.</p>
        </caption>
        <graphic xlink:href="jmir_v22i8e20285_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <sec>
        <title>Experimental Design AR Model</title>
        <p>We analyzed the performance of models built using only local, province-level epidemiological data as input. We generated an AR model for each province, built on COVID-19 cases that occurred in the previous three AR lags (ie, the previous three 2-day reports), and compared our estimates with the baseline. Our results, presented in <xref rid="figure4" ref-type="fig">Figure 4</xref> (also see Tables S2 and S3 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for a detailed description of our model results), labeled AR, show that the AR model’s predictive power was overall inferior to baseline performance, with exception to Jilin, Tianjin, Hebei, Hubei, and Heilongjiang. Subsequently, we incorporated local disease-related internet search information from Baidu and news alert data from Media Cloud as inputs to build ARGO-type models [<xref ref-type="bibr" rid="ref9">9</xref>]. These ARGO-type models showed marginal predictive power improvements when compared with AR models and only outperformed the baseline in seven provinces.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Graphical visualization of the models’ performances. Comparison of the improvement in terms of root mean square error (RMSE) (top) and Pearson correlation (bottom) for each model used in the study. To facilitate comparison between model scores in each province in terms of RMSE, we normalized the RMSE score of each model by the baseline’s RMSE and visualized its inverse value. In this way, scores above one imply an improvement (RMSE reduction), whereas a score below one implies the model had a bigger RMSE in comparison to the baseline. In the case of correlation, we plotted the difference between the absolute values between each model’s correlation and the baseline. Each panel is ordered, from left to right, based on the metric performance of ARGONet + GLEAM (solid red). AR: autoregressive.</p>
          </caption>
          <graphic xlink:href="jmir_v22i8e20285_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Dynamic Clustering of Chinese Provinces</title>
        <p>Based on prior work on influenza activity prediction [<xref ref-type="bibr" rid="ref11">11</xref>], we added historical COVID-19 activity information for all Chinese provinces to the input of our local models. We calculated the pairwise correlation matrix for confirmed COVID-19 cases between all Chinese provinces, between February 1 and February 21, 2020 (<xref rid="figure5" ref-type="fig">Figure 5</xref>). Our results showed that most of the provinces experienced similar epidemic trends. To build our (clustered) predictive models, we combined the data available from several provinces with similar trends (in terms of correlation, which was strictly calculated within our training period at the time-step of prediction). The clustering modeling approach, which incorporated internet-based data sources as the ARGO-type models, produced forecasts that led to error reductions for 17 out of 32 provinces compared to the persistence model and improved correlation values in 20 out of 32 provinces.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Visualization of the pairwise correlation matrices of confirmed cases and human mobility from Wuhan to each Chinese province. During the period of January, we can see a similar trend of mobility for a big cluster of provinces as well as a similar trend of number of confirmed cases for the period of February.</p>
          </caption>
          <graphic xlink:href="jmir_v22i8e20285_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Augmentation</title>
        <p>As an additional way to increase the number of observations in the training set of each cluster, we implemented a data augmentation technique. This process consisted of generating new observations via a Bootstrap method and addition of random Gaussian noise <inline-graphic xlink:href="jmir_v22i8e20285_fig10.png" mimetype="image" xlink:type="simple"/>
 to every randomly selected observation.</p>
      </sec>
      <sec>
        <title>ARGONet Model</title>
        <p>The results of incorporating both clustering and augmentation techniques can be seen in <xref rid="figure4" ref-type="fig">Figure 4</xref> and a visualization of the errors can be seen in <xref rid="figure6" ref-type="fig">Figure 6</xref>. For simplicity, we labeled these predictions ARGONet, even though this implementation of ARGONet is an enhanced version specifically designed for emerging outbreaks where data are scarce. In terms of RMSE, our results show that ARGONet’s predictive power was able to outperform AR and the persistence model in 25 of the 32 Chinese provinces. In terms of correlation, ARGONet outperformed the baseline (persistence) model in 18 provinces.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Visualization of the errors. Graphical visualization of the out-of-sample coronavirus disease (COVID-19) error (ŷ–y) between February 3, 2020, and February 21, 2020.</p>
          </caption>
          <graphic xlink:href="jmir_v22i8e20285_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>ARGONet + GLEAM Model</title>
        <p>We included forecasts produced by mechanistic model as an additional input in our models (prior to the clustering and augmentation steps). The results of incorporating these estimates can be seen in <xref rid="figure4" ref-type="fig">Figure 4</xref> with the name of ARGONet + GLEAM and a visualization of the errors can be seen in <xref rid="figure6" ref-type="fig">Figure 6</xref>. Our results show that the inclusion of mechanistic model estimates improved ARGONet’s predictive power across most provinces. ARGONet + GLEAM led to error reductions in 27 out of 32 provinces compared to the baseline. In terms of correlation, it improved in 26 out of 32 provinces. Provinces like Qinghai, Hunan, and Jiangxi showed the biggest improvement, whereas Taiwan, Hong Kong, Shanxi, and Liaoning did not display error reductions.</p>
      </sec>
      <sec>
        <title>Visualization of the Results</title>
        <p>As an alternative way to visualize ARGONet + GLEAM’s predictive performance, we plotted a map with Chinese provinces (<xref rid="figure7" ref-type="fig">Figure 7</xref>), color coded based on the improvement shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. From a geographical perspective, the provinces where ARGONet + GLEAM had the most improvement (Anhui, Jiangxi, Fujian, Sichuan, and Guangdong) were located in south central China. Shanxi, Liaoning, Taiwan, Hong Kong, and Guangxi are the provinces where our models were not able to reduce the error compared to the baseline. While ARGONet + GLEAM’s performance in these provinces was not superior to the baseline, its predictions were within a reasonable range, as seen in <xref rid="figure2" ref-type="fig">Figures 2</xref> and <xref rid="figure3" ref-type="fig">3</xref>. We were not able to perform any analysis on Tibet, one of the largest provinces in China, and Macau given their low count of detected COVID-19 cases.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Geographical visualization of the relative improvement of ARGONet + GLEAM compared to the baseline. Chinese provinces that show an increase in performance relative to the baseline are shaded green, while provinces that did not perform better than our baseline are shaded purple. Provinces with the highest improvement (Anhui, Shanghai, Sichuan, Fujian, Jiangxi, Guangdong, and Qinghai) and underperformance (Taiwan, Shanxi, Liaoning, Hong Kong, and Guangxi) are identified by a red dot over the province.</p>
          </caption>
          <graphic xlink:href="jmir_v22i8e20285_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Analysis of the Importance of the Sources of Information Over Time</title>
        <p>To minimize the prediction errors in our estimates, the dynamic design of our methodology utilizes different sources of information as needed over time. This means that for each province (or group of provinces within a cluster), we can quantify the predictive power of different features used in our models as time evolves. Our analysis, visualized in <xref rid="figure8" ref-type="fig">Figure 8</xref>, shows that historical COVID-19 confirmed cases and suspected cases were consistently relevant sources of information over most of the study period. Internet-based search terms from Baidu were also frequently used. Daily news counts were used by our models in a selected number of provinces. However, for many of these provinces, the importance of media article counts decrease over time. Estimates from mechanistic models contributed to our model prediction, especially in early February 2020.</p>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Graphical visualization of the relevance of data sources. Time evolution of the value (averaged over the 20 experiments) of the linear coefficients for the features used in our methodology, visualized per province. Every heatmap includes the same number of features (rows) and is organized in the same order.</p>
          </caption>
          <graphic xlink:href="jmir_v22i8e20285_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We presented a methodology capable of producing meaningful and reliable short-term (2 days ahead) forecasts of COVID-19 activity, at the province level in China, by combining information from reports from China CDC, internet search trends, news article trends, and information from mechanistic models. Our approach is capable of overcoming multiple challenges characteristic of emerging outbreaks caused by novel pathogens. These challenges include the lack of historical disease activity information to calibrate models, the low volume of case count data, and the inherent delay in gaining access to data. Methodologically speaking, our method maximizes the use of a limited number of observations as the outbreak unfolded by (a) choosing an appropriate aggregation time-window (2 days) to improve the signal-to-noise ratio, (b) leveraging synchronicities in the spatiotemporal trends in COVID-19 across provinces to produce cluster-specific models of prediction, and (c) using data augmentation methods to increase stability in the training of our models.</p>
        <p>Previous methods, such as the ARGONet model [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref45">45</xref>], have been shown to make accurate real-time prediction at the state level in the United States for seasonal infectious diseases such as influenza. In addition, Chinazzi et al [<xref ref-type="bibr" rid="ref16">16</xref>] showed that it was possible to estimate the evolution of an emerging outbreak using a mechanistic model. Nevertheless, as far as we know, reliable real-time methodologies to forecast new case counts for an emerging disease outbreak remained an unsolved problem. In this study, we showed that a dynamically trained machine learning model can accurately produce real-time estimates for COVID-19 outbreaks.</p>
        <p>In terms of prediction error, our proposed methodology, ARGONet + GLEAM, was able to outperform the persistence model in 27 out of 32 provinces. While our method does not show prediction error improvements in Guangxi, Liaoning, Shanxi, Taiwan, and Hong Kong, our forecasts are still within range in all provinces except for Taiwan, where very few cases were reported during the time period of this study. It is important to note that Taiwan, Hong Kong, and Guangxi have different administrative (and likely health care) systems compared with the other provinces. This could explain the differences in COVID-19 trends in these regions and could help explain why our models do not seem to add value to the persistence model. Features studies should investigate if incorporating disease activity estimates from other mechanistic models, likely designed with different assumptions and mathematical formulations, could lead to further improvements.</p>
        <p>We were unable to identify an accurate (daily) parametrization of changes in human mobility due to the widespread local lockdowns during the period of our study (February 3-21, 2020), and thus, we did not include this data source as a potential predictor. Future studies may incorporate (high temporal resolution) human mobility data as a modulator of transmission and predictor of disease activity. When looking at the entire time period of this study, however, we observed that the data-driven clustering of provinces used in our approach and based on COVID-19 activity appears to have similarities with the clustering one would obtain from using human mobility data made available by Baidu (<xref rid="figure5" ref-type="fig">Figure 5</xref>). This result aligns with the conclusion of other available studies that found that the time evolution of the COVID-19 outbreak in China was significantly influenced by changes in human mobility (consequence of public health interventions) [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref46">46</xref>], and associated with the percentage of people traveling from Wuhan in the early stages.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>One limitation of our study is that during the test time period of our methods a consistent decrease in COVID-19 cases (due to strong public health interventions) was observed and thus our methods could not be tested for their ability to identify the epidemic peaks across provinces. The brevity of the COVID-19 epidemic outbreak in Chinese provinces was the limiting factor for this as the observations that corresponded to the growth phase of the outbreak were used for training purposes. Future model implementations in other locations where the growth phase has spanned longer time periods, like New York, United States, should investigate the ability of our models to properly identify peaks.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Our findings suggest that it is possible to use very limited amounts of data from multiple data sources to conduct real-time forecasting in the early stage of an emerging outbreak. We believe that our method, ARGONet + GLEAM, could prove to be useful for public health officials to monitor (and perhaps prevent) the spread of the virus [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. As the SARS-CoV-2 virus continues to spread around the world, extensions of our methods could be implemented to provide timely and reliable disease activity estimates to decision makers.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Mobility data correlation.</p>
        <media xlink:href="jmir_v22i8e20285_app1.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Supplementary tables.</p>
        <media xlink:href="jmir_v22i8e20285_app2.docx" xlink:title="DOCX File , 24 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AR</term>
          <def>
            <p>autoregressive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">China CDC</term>
          <def>
            <p>Chinese Center for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">COVID-19</term>
          <def>
            <p>coronavirus disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">GLEAM</term>
          <def>
            <p>global epidemic and mobility</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">I</term>
          <def>
            <p>infectious</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">L</term>
          <def>
            <p>latent</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LASSO</term>
          <def>
            <p>least absolute shrinkage and selection operator</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MIDAS</term>
          <def>
            <p>Models of Infectious Disease Agent Study</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">PHEIC</term>
          <def>
            <p>Public Health Emergency of International Concern</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">R</term>
          <def>
            <p>removed</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">RMSE</term>
          <def>
            <p>root mean square error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">S</term>
          <def>
            <p>susceptible</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">SARS-CoV-2</term>
          <def>
            <p>severe acute respiratory syndrome coronavirus 2</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We thank Dr Wei Luo for his assistance and guidance on the interpretation of mobility data for Chinese provinces. MC and AV acknowledge support from Google Cloud Healthcare and Life Sciences Solutions via the GCP research credits program.</p>
      <p>CP, AV, and MS were partially supported by the National Institute of General Medical Sciences of the National Institutes of Health under Award Number R01GM130668. MC and AV report grants from Metabiota Inc, outside the submitted work. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>DL, LC, CP, AV, and MS conceived and designed the study. DL, LC, CP, XD, and MC collected the different data sources. MC, JD, and AV produced predictions using the GLEAM modeling platform. DL, LC, and CP implemented the ARGONet + GLEAM methodology. DL, LC, CP, and MS analyzed the results. DL, LC, CP, and MS wrote the first draft of the manuscript. All authors contributed to and approved the final version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>GF</given-names>
            </name>
            <name name-style="western">
              <surname>Cowling</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Early Transmission Dynamics in Wuhan, China, of Novel Coronavirus–Infected Pneumonia</article-title>
          <source>N Engl J Med</source>
          <year>2020</year>
          <month>03</month>
          <day>26</day>
          <volume>382</volume>
          <issue>13</issue>
          <fpage>1199</fpage>
          <lpage>1207</lpage>
          <pub-id pub-id-type="doi">10.1056/nejmoa2001316</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>GF</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>A Novel Coronavirus from Patients with Pneumonia in China, 2019</article-title>
          <source>N Engl J Med</source>
          <year>2020</year>
          <month>02</month>
          <day>20</day>
          <volume>382</volume>
          <issue>8</issue>
          <fpage>727</fpage>
          <lpage>733</lpage>
          <pub-id pub-id-type="doi">10.1056/nejmoa2001017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kok</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>To</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yip</surname>
              <given-names>Cc</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>Rw</given-names>
            </name>
            <name name-style="western">
              <surname>Tsoi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>Sk</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>Vk</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ip</surname>
              <given-names>Jd</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Vc</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>Ck</given-names>
            </name>
            <name name-style="western">
              <surname>Yuen</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A familial cluster of pneumonia associated with the 2019 novel coronavirus indicating person-to-person transmission: a study of a family cluster</article-title>
          <source>The Lancet</source>
          <year>2020</year>
          <month>02</month>
          <volume>395</volume>
          <issue>10223</issue>
          <fpage>514</fpage>
          <lpage>523</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(20)30154-9/fulltext"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(20)30154-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pullano</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pinotti</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Valdano</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Poletto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Boelle</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Preparedness and vulnerability of African countries against introductions of 2019-nCoV</article-title>
          <source>medRxiv</source>
          <year>2020</year>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/2020.02.05.20020792v1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2020.02.05.20020792</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <article-title>Statement on the second meeting of the International Health Regulations (2005) Emergency Committee regarding the outbreak of novel coronavirus (2019-nCoV)</article-title>
          <source>World Health Organization</source>
          <year>2020</year>
          <access-date>2020-02-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/news-room/detail/30-01-2020-statement-on-the-second-meeting-of-the-international-health-regulations-(2005)-emergency-committee-regarding-the-outbreak-of-novel-coronavirus-(2019-ncov)">https://www.who.int/news-room/detail/30-01-2020-statement-on-the-second-meeting-of-the-international-health-regulations-(2005)-emergency-committee-regarding-the-outbreak-of-novel-coronavirus-(2019-ncov)</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Du</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cauchemez</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cowling</surname>
              <given-names>BJ</given-names>
            </name>
          </person-group>
          <article-title>Risk of 2019 novel coronavirus importations throughout China prior to the Wuhan quarantine</article-title>
          <source>medRxiv</source>
          <year>2020</year>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/2020.01.28.20019299v4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2020.01.28.20019299</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Viboud</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Early epidemiological analysis of the 2019-nCoV outbreak based on a crowdsourced data</article-title>
          <source>medRxiv</source>
          <year>2020</year>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/2020.01.31.20019935v1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2020.01.31.20019935</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Horby</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Hayden</surname>
              <given-names>FG</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>GF</given-names>
            </name>
          </person-group>
          <article-title>A novel coronavirus outbreak of global health concern</article-title>
          <source>The Lancet</source>
          <year>2020</year>
          <month>02</month>
          <volume>395</volume>
          <issue>10223</issue>
          <fpage>470</fpage>
          <lpage>473</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(20)30185-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kou</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>Accurate estimation of influenza epidemics using Google search data via ARGO</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2015</year>
          <month>11</month>
          <day>24</day>
          <volume>112</volume>
          <issue>47</issue>
          <fpage>14473</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.pnas.org/cgi/pmidlookup?view=long&#38;pmid=26553980"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1515373112</pub-id>
          <pub-id pub-id-type="medline">26553980</pub-id>
          <pub-id pub-id-type="pii">1515373112</pub-id>
          <pub-id pub-id-type="pmcid">PMC4664296</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Combining Search, Social Media, and Traditional Data Sources to Improve Influenza Surveillance</article-title>
          <source>PLoS Comput Biol</source>
          <year>2015</year>
          <month>10</month>
          <day>29</day>
          <volume>11</volume>
          <issue>10</issue>
          <fpage>e1004513</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pcbi.1004513"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1004513</pub-id>
          <pub-id pub-id-type="medline">26513245</pub-id>
          <pub-id pub-id-type="pii">PCOMPBIOL-D-15-00856</pub-id>
          <pub-id pub-id-type="pmcid">PMC4626021</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Hattab</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Clemente</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Biggerstaff</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Improved state-level influenza nowcasting in the United States leveraging Internet-based data and network approaches</article-title>
          <source>Nat Commun</source>
          <year>2019</year>
          <month>01</month>
          <day>11</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>147</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30635558"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-018-08082-0</pub-id>
          <pub-id pub-id-type="medline">30635558</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-018-08082-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6329822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cleaton</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Viboud</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Simonsen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hurtado</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Chowell</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Characterizing Ebola Transmission Patterns Based on Internet News Reports</article-title>
          <source>Clin Infect Dis</source>
          <year>2016</year>
          <month>01</month>
          <day>01</day>
          <volume>62</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26338786"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/cid/civ748</pub-id>
          <pub-id pub-id-type="medline">26338786</pub-id>
          <pub-id pub-id-type="pii">civ748</pub-id>
          <pub-id pub-id-type="pmcid">PMC4678106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Retrospective analysis of the possibility of predicting the COVID-19 outbreak from Internet searches and social media data, China, 2020</article-title>
          <source>Eurosurveillance</source>
          <year>2020</year>
          <volume>25</volume>
          <issue>10</issue>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.eurosurveillance.org/content/10.2807/1560-7917.ES.2020.25.10.2000199#html_fulltext"/>
          </comment>
          <pub-id pub-id-type="doi">10.2807/1560-7917.es.2020.25.10.2000199</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipsitch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Enhancing Situational Awareness to Prevent Infectious Disease Outbreaks from Becoming Catastrophic</article-title>
          <source>Curr Top Microbiol Immunol</source>
          <year>2019</year>
          <volume>424</volume>
          <fpage>59</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1007/82_2019_172</pub-id>
          <pub-id pub-id-type="medline">31292726</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Shaman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Substantial undocumented infection facilitates the rapid dissemination of novel coronavirus (SARS-CoV-2)</article-title>
          <source>Science</source>
          <year>2020</year>
          <month>05</month>
          <day>01</day>
          <volume>368</volume>
          <issue>6490</issue>
          <fpage>489</fpage>
          <lpage>493</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32179701"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/science.abb3221</pub-id>
          <pub-id pub-id-type="medline">32179701</pub-id>
          <pub-id pub-id-type="pii">science.abb3221</pub-id>
          <pub-id pub-id-type="pmcid">PMC7164387</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chinazzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Ajelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gioannini</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Litvinova</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Merler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pastore Y Piontti</surname>
              <given-names>Ana</given-names>
            </name>
            <name name-style="western">
              <surname>Mu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Viboud</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Halloran</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Longini</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The effect of travel restrictions on the spread of the 2019 novel coronavirus (COVID-19) outbreak</article-title>
          <source>Science</source>
          <year>2020</year>
          <month>04</month>
          <day>24</day>
          <volume>368</volume>
          <issue>6489</issue>
          <fpage>395</fpage>
          <lpage>400</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32144116"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/science.aba9757</pub-id>
          <pub-id pub-id-type="medline">32144116</pub-id>
          <pub-id pub-id-type="pii">science.aba9757</pub-id>
          <pub-id pub-id-type="pmcid">PMC7164386</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ruktanonchai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Prosper</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Floyd</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Effect of non-pharmaceutical interventions for containing the COVID-19 outbreak in China</article-title>
          <source>medRxiv</source>
          <year>2020</year>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/2020.03.03.20029843v3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2020.03.03.20029843</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Akhmetzhanov</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hayashi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Linton</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kobayashi</surname>
              <given-names>Tetsuro</given-names>
            </name>
            <name name-style="western">
              <surname>Kinoshita</surname>
              <given-names>Ryo</given-names>
            </name>
            <name name-style="western">
              <surname>Nishiura</surname>
              <given-names>Hiroshi</given-names>
            </name>
          </person-group>
          <article-title>Real-Time Estimation of the Risk of Death from Novel Coronavirus (COVID-19) Infection: Inference Using Exported Cases</article-title>
          <source>J Clin Med</source>
          <year>2020</year>
          <month>02</month>
          <day>14</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>9</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jcm9020523"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jcm9020523</pub-id>
          <pub-id pub-id-type="medline">32075152</pub-id>
          <pub-id pub-id-type="pii">jcm9020523</pub-id>
          <pub-id pub-id-type="pmcid">PMC7074479</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gore</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lynch</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Virginia County COVID-19 daily case total forecaster</article-title>
          <source>VMASC</source>
          <year>2020</year>
          <access-date>2020-07-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://vmasc.shinyapps.io/va-county-covid-forecast/">https://vmasc.shinyapps.io/va-county-covid-forecast/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eldabi</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Hybrid simulation: Historical lessons, present challenges and futures</article-title>
          <year>2016</year>
          <conf-name>WSC '16: Proceedings of the 2016 Winter Simulation Conference</conf-name>
          <conf-date>11-14 Dec 2016</conf-date>
          <conf-loc>Washington, DC, USA</conf-loc>
          <publisher-name>IEEE</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/7822192"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/WSC.2016.7822192</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Giabbanelli</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Solving challenges at the interface of simulation and big data using machine learning</article-title>
          <year>2019</year>
          <conf-name>2019 Winter Simulation Conference (WSC)</conf-name>
          <conf-date>8-11 Dec 2019</conf-date>
          <conf-loc>National Harbor, MD, USA, USA</conf-loc>
          <publisher-name>IEEE</publisher-name>
          <fpage>572</fpage>
          <lpage>583</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/9004755"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/wsc40007.2019.9004755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aiken</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McGough</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Majumder</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wachtel</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Viboud</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Real-time Estimation of Disease Activity in Emerging Outbreaks using Internet Search Information</article-title>
          <source>medRxiv</source>
          <year>2019</year>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/19010470v1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/19010470v1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McGough</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hawkins</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Forecasting Zika Incidence in the 2016 Latin America Outbreak Combining Traditional Disease Surveillance with Search, Social Media, and News Report Data</article-title>
          <source>PLoS Negl Trop Dis</source>
          <year>2017</year>
          <month>01</month>
          <day>13</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e0005295</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pntd.0005295"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pntd.0005295</pub-id>
          <pub-id pub-id-type="medline">28085877</pub-id>
          <pub-id pub-id-type="pii">PNTD-D-16-01733</pub-id>
          <pub-id pub-id-type="pmcid">PMC5268704</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Models of Infectious Disease Agent Study Association</collab>
          </person-group>
          <article-title>midas-network / COVID-19</article-title>
          <source>GitHub</source>
          <access-date>2020-08-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/midas-network/COVID-19/tree/master/data/cases">https://github.com/midas-network/COVID-19/tree/master/data/cases</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phan</surname>
              <given-names>LT</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TV</given-names>
            </name>
            <name name-style="western">
              <surname>Luong</surname>
              <given-names>QC</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TV</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>HT</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>HQ</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>QD</given-names>
            </name>
          </person-group>
          <article-title>Importation and Human-to-Human Transmission of a Novel Coronavirus in Vietnam</article-title>
          <source>N Engl J Med</source>
          <year>2020</year>
          <month>02</month>
          <day>27</day>
          <volume>382</volume>
          <issue>9</issue>
          <fpage>872</fpage>
          <lpage>874</lpage>
          <pub-id pub-id-type="doi">10.1056/nejmc2001272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Balcan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>Bruno</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ramasco</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Colizza</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Modeling the spatial spread of infectious diseases: the GLobal Epidemic and Mobility computational model</article-title>
          <source>J Comput Sci</source>
          <year>2010</year>
          <month>08</month>
          <day>01</day>
          <volume>1</volume>
          <issue>3</issue>
          <fpage>132</fpage>
          <lpage>145</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21415939"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jocs.2010.07.002</pub-id>
          <pub-id pub-id-type="medline">21415939</pub-id>
          <pub-id pub-id-type="pmcid">PMC3056392</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gomes</surname>
              <given-names>MFC</given-names>
            </name>
            <name name-style="western">
              <surname>Pastore Y Piontti</surname>
              <given-names>Ana</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Longini</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Halloran</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Assessing the international spreading risk associated with the 2014 west african ebola outbreak</article-title>
          <source>PLoS Curr</source>
          <year>2014</year>
          <month>09</month>
          <day>02</day>
          <volume>6</volume>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1371/currents.outbreaks.cd818f63d40e24aef769dda7df9e0da5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/currents.outbreaks.cd818f63d40e24aef769dda7df9e0da5</pub-id>
          <pub-id pub-id-type="medline">25642360</pub-id>
          <pub-id pub-id-type="pmcid">PMC4169359</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chinazzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pastore Y Piontti</surname>
              <given-names>Ana</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Rojas</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Merler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mistry</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Poletti</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Halloran</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Longini</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Spread of Zika virus in the Americas</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2017</year>
          <month>05</month>
          <day>30</day>
          <volume>114</volume>
          <issue>22</issue>
          <fpage>E4334</fpage>
          <lpage>E4343</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.pnas.org/cgi/pmidlookup?view=long&#38;pmid=28442561"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1620161114</pub-id>
          <pub-id pub-id-type="medline">28442561</pub-id>
          <pub-id pub-id-type="pii">1620161114</pub-id>
          <pub-id pub-id-type="pmcid">PMC5465916</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rambaut</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Phylogenetic analysis of 23 nCoV-2019 genomes, 2020-01-23</article-title>
          <source>Virological.org</source>
          <year>2020</year>
          <access-date>2020-02-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://virological.org/t/phylogenetic-analysis-of-23-ncov-2019-genomes-2020-01-23/335">https://virological.org/t/phylogenetic-analysis-of-23-ncov-2019-genomes-2020-01-23/335</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cori</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dorigatti</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Baguelin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Donnelly</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ferguson</surname>
              <given-names>NM</given-names>
            </name>
          </person-group>
          <article-title>Report 3: Transmissibility of 2019-nCoV</article-title>
          <source>Imperial College London COVID-19 Response Team</source>
          <year>2020</year>
          <month>01</month>
          <access-date>2020-02-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.imperial.ac.uk/mrc-global-infectious-disease-analysis/news--wuhan-coronavirus">https://www.imperial.ac.uk/mrc-global-infectious-disease-analysis/news--wuhan-coronavirus</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Estimates of the clock and TMRCA for 2019-nCoV based on 27 genomes</article-title>
          <source>Virological.org</source>
          <year>2020</year>
          <access-date>2020-02-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://virological.org/t/clock-and-tmrca-based-on-27-genomes/347">http://virological.org/t/clock-and-tmrca-based-on-27-genomes/347</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bedford</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Neher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hadfield</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hodcroft</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ilcisin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Muller</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Genomic analysis of nCoV spread. Situation report 2020-01-23</article-title>
          <source>Nextstrain</source>
          <year>2020</year>
          <access-date>2020-02-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://nextstrain.org/narratives/ncov/sit-rep/2020-01-23">https://nextstrain.org/narratives/ncov/sit-rep/2020-01-23</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sunnåker</surname>
              <given-names>Mikael</given-names>
            </name>
            <name name-style="western">
              <surname>Busetto</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Numminen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Corander</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Foll</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dessimoz</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Approximate Bayesian computation</article-title>
          <source>PLoS Comput Biol</source>
          <year>2013</year>
          <month>1</month>
          <day>10</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e1002803</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pcbi.1002803"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002803</pub-id>
          <pub-id pub-id-type="medline">23341757</pub-id>
          <pub-id pub-id-type="pii">PCOMPBIOL-D-12-01664</pub-id>
          <pub-id pub-id-type="pmcid">PMC3547661</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Niehus</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>De Salazar</surname>
              <given-names>Pablo M</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lipsitch</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Quantifying bias of COVID-19 prevalence and severity estimates in Wuhan, China that depend on reported cases in international travelers</article-title>
          <source>medRxiv</source>
          <year>2020</year>
          <month>02</month>
          <day>18</day>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1101/2020.02.13.20022707"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2020.02.13.20022707</pub-id>
          <pub-id pub-id-type="medline">32511442</pub-id>
          <pub-id pub-id-type="pmcid">PMC7239063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Salazar</surname>
              <given-names>P M</given-names>
            </name>
            <name name-style="western">
              <surname>Niehus</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Buckee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lipsitch</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Using predicted imports of 2019-nCoV cases to determine locations that may not be identifying all imported cases</article-title>
          <source>medRxiv</source>
          <year>2020</year>
          <month>02</month>
          <day>11</day>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1101/2020.02.04.20020495"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2020.02.04.20020495</pub-id>
          <pub-id pub-id-type="medline">32511458</pub-id>
          <pub-id pub-id-type="pmcid">PMC7239086</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Viboud</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Early epidemiological analysis of the coronavirus disease 2019 outbreak based on crowdsourced data: a population-level observational study</article-title>
          <source>The Lancet Digital Health</source>
          <year>2020</year>
          <month>04</month>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>e201</fpage>
          <lpage>e208</lpage>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(20)30026-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pinotti</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Di Domenico</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ortega</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mancastroppa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pullano</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Valdano</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Lessons learnt from 288 COVID-19 international cases: importations over time, effect of interventions, underdetection of imported cases</article-title>
          <source>medRxiv</source>
          <year>2020</year>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/2020.02.24.20027326v1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2020.02.24.20027326</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Defays</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>An efficient algorithm for a complete link method</article-title>
          <source>The Computer Journal</source>
          <year>1977</year>
          <month>04</month>
          <day>01</day>
          <volume>20</volume>
          <issue>4</issue>
          <fpage>364</fpage>
          <lpage>366</lpage>
          <pub-id pub-id-type="doi">10.1093/comjnl/20.4.364</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Calinski</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Harabasz</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A dendrite method for cluster analysis</article-title>
          <source>Comm in Stats</source>
          <year>1974</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1080/03610927408827101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Henelius</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Puolamäki</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Boström</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Papapetrou</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Clustering with Confidence: Finding Clusters with Statistical Guarantees</article-title>
          <source>arXiv</source>
          <year>2016</year>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1612.08714"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sasirekha</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Baby</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Agglomerative Hierarchical Clustering Algorithm - A Review</article-title>
          <source>International Journal of Scientific and Research Publications</source>
          <year>2013</year>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/bd9f/e11a960001cb845ea74a75cf8c10b8c34615.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/978-1-4419-9863-7_100033</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seal</surname>
              <given-names>HL</given-names>
            </name>
          </person-group>
          <article-title>Studies in the History of Probability and Statistics. XV The historical development of the Gauss linear model</article-title>
          <source>Biometrika</source>
          <year>1967</year>
          <volume>54</volume>
          <issue>1-2</issue>
          <fpage>1</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1093/biomet/54.1-2.1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Regression Shrinkage and Selection Via the Lasso</article-title>
          <source>Journal of the Royal Statistical Society: Series B (Methodological)</source>
          <year>1996</year>
          <volume>58</volume>
          <issue>1</issue>
          <fpage>267</fpage>
          <lpage>288</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1996.tb02080.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Santillana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mekaru</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scales</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Using clinicians' search query data to monitor influenza epidemics</article-title>
          <source>Clin Infect Dis</source>
          <year>2014</year>
          <month>11</month>
          <day>15</day>
          <volume>59</volume>
          <issue>10</issue>
          <fpage>1446</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25115873"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/cid/ciu647</pub-id>
          <pub-id pub-id-type="medline">25115873</pub-id>
          <pub-id pub-id-type="pii">ciu647</pub-id>
          <pub-id pub-id-type="pmcid">PMC4296132</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Poirier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hswen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bouzille</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cuggia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lavenu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Influenza forecasting for the French regions by using EHR web and climatic data sources with an ensemble approach ARGONet</article-title>
          <source>medRxiv</source>
          <year>2019</year>
          <fpage>e</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/19009795v1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/19009795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kraemer</surname>
              <given-names>MUG</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gutierrez</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pigott</surname>
              <given-names>DM</given-names>
            </name>
            <collab>Open COVID-19 Data Working Group</collab>
            <name name-style="western">
              <surname>du Plessis</surname>
              <given-names>Louis</given-names>
            </name>
            <name name-style="western">
              <surname>Faria</surname>
              <given-names>Nuno R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Ruoran</given-names>
            </name>
            <name name-style="western">
              <surname>Hanage</surname>
              <given-names>William P</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>John S</given-names>
            </name>
            <name name-style="western">
              <surname>Layan</surname>
              <given-names>Maylis</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>Alessandro</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Huaiyu</given-names>
            </name>
            <name name-style="western">
              <surname>Dye</surname>
              <given-names>Christopher</given-names>
            </name>
            <name name-style="western">
              <surname>Pybus</surname>
              <given-names>Oliver G</given-names>
            </name>
            <name name-style="western">
              <surname>Scarpino</surname>
              <given-names>Samuel V</given-names>
            </name>
          </person-group>
          <article-title>The effect of human mobility and control measures on the COVID-19 epidemic in China</article-title>
          <source>Science</source>
          <year>2020</year>
          <month>05</month>
          <day>01</day>
          <volume>368</volume>
          <issue>6490</issue>
          <fpage>493</fpage>
          <lpage>497</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32213647"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/science.abb4218</pub-id>
          <pub-id pub-id-type="medline">32213647</pub-id>
          <pub-id pub-id-type="pii">science.abb4218</pub-id>
          <pub-id pub-id-type="pmcid">PMC7146642</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Nowcasting and forecasting the potential domestic and international spread of the 2019-nCoV outbreak originating in Wuhan, China: a modelling study</article-title>
          <source>The Lancet</source>
          <year>2020</year>
          <month>02</month>
          <volume>395</volume>
          <issue>10225</issue>
          <fpage>689</fpage>
          <lpage>697</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(20)30260-9</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
