<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
    <front>
        <journal-meta>
            <journal-id journal-id-type="publisher-id">JMIR</journal-id>
            <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
            <journal-title>Journal of Medical Internet Research</journal-title>
            <issn pub-type="epub">1438-8871</issn>
            <publisher>
                <publisher-name>JMIR Publications Inc.</publisher-name>
                <publisher-loc>Toronto, Canada</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="publisher-id">v16i6e154</article-id>
            <article-id pub-id-type="pmid">24943128</article-id>
            <article-id pub-id-type="doi">10.2196/jmir.3156</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Original Paper</subject>
                </subj-group>
                <subj-group subj-group-type="article-type">
                    <subject>Original Paper</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Detecting Disease Outbreaks in Mass Gatherings Using Internet Data</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="editor">
                    <name>
                        <surname>Eysenbach</surname>
                        <given-names>Gunther</given-names>
                    </name>
                </contrib>
            </contrib-group>
            <contrib-group>
                <contrib contrib-type="reviewer">
                    <name>
                        <surname>Sakaki</surname>
                        <given-names>Takeshi</given-names>
                    </name>
                </contrib>
                <contrib contrib-type="reviewer">
                    <name>
                        <surname>Jiang</surname>
                        <given-names>Jianmin</given-names>
                    </name>
                </contrib>
                <contrib contrib-type="reviewer">
                    <name>
                        <surname>Freifeld</surname>
                        <given-names>Clark</given-names>
                    </name>
                </contrib>
            </contrib-group>
            <contrib-group>
                <contrib contrib-type="author" id="contrib1">
                    <name name-style="western">
                        <surname>Yom-Tov</surname>
                        <given-names>Elad</given-names>
                    </name>
                    <degrees>BSc, MA, PhD</degrees>
                    <xref rid="aff1" ref-type="aff">1</xref>
                </contrib>
                <contrib contrib-type="author" id="contrib2" corresp="yes">
                    <name name-style="western">
                        <surname>Borsa</surname>
                        <given-names>Diana</given-names>
                    </name>
                    <degrees>BSc, BMath, MSc (Hons)</degrees>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <address>
                        <institution>Centre of Computational Statistics and Machine Learning (CSML)</institution>
                        <institution>Department of Computer Science</institution>
                        <institution>University College London, University of London</institution>
                        <addr-line>Malet Place</addr-line>
                        <addr-line>Gower St</addr-line>
                        <addr-line>London, WC1E 6BT</addr-line>
                        <country>United Kingdom</country>
                        <phone>44 20 7679</phone>
                        <fax>44 20 7387 1397</fax>
                        <email>d.borsa@cs.ucl.ac.uk</email>
                    </address>
                </contrib>
                <contrib contrib-type="author" id="contrib3">
                    <name name-style="western">
                        <surname>Cox</surname>
                        <given-names>Ingemar J</given-names>
                    </name>
                    <degrees>BSc, DPhil</degrees>
                    <xref rid="aff3" ref-type="aff">3</xref>
                    <xref rid="aff4" ref-type="aff">4</xref>
                </contrib>
                <contrib contrib-type="author" id="contrib4">
                    <name name-style="western">
                        <surname>McKendry</surname>
                        <given-names>Rachel A</given-names>
                    </name>
                    <degrees>BSc, PhD</degrees>
                    <xref rid="aff5" ref-type="aff">5</xref>
                </contrib>
            </contrib-group>
            <aff id="aff1">
                <sup>1</sup>
                <institution>Microsoft Research Israel</institution>
                <addr-line>Herzelia</addr-line>
                <country>Israel</country>
            </aff>
            <aff id="aff2">
                <sup>2</sup>
                <institution>Centre of Computational Statistics and Machine Learning (CSML)</institution>
                <institution>Department of Computer Science</institution>
                <institution>University College London, University of London</institution>
                <addr-line>London</addr-line>
                <country>United Kingdom</country>
            </aff>
            <aff id="aff3">
                <sup>3</sup>
                <institution>Copenhagen University</institution>
                <institution>Department of Computer Science</institution>
                <addr-line>Copenhagen</addr-line>
                <country>Denmark</country>
            </aff>
            <aff id="aff4">
                <sup>4</sup>
                <institution>University College London, University of London</institution>
                <institution>Department of Computer Science</institution>
                <addr-line>London</addr-line>
                <country>United Kingdom</country>
            </aff>
            <aff id="aff5">
                <sup>5</sup>
                <institution>University College London, University of London</institution>
                <institution>London Centre for Nanotechnology and Division of Medicine</institution>
                <addr-line>London</addr-line>
                <country>United Kingdom</country>
            </aff>
            <author-notes>
                <corresp>Corresponding Author: Diana Borsa <email>d.borsa@cs.ucl.ac.uk</email>
                </corresp>
            </author-notes>
            <pub-date pub-type="collection">
                <month>06</month>
                <year>2014</year>
            </pub-date>
            <pub-date pub-type="epub">
                <day>18</day>
                <month>06</month>
                <year>2014</year>
            </pub-date>
            <volume>16</volume>
            <issue>6</issue>
            <elocation-id>e154</elocation-id>
            <!--history from ojs - api-xml-->
            <history>
                <date date-type="received">
                    <day>02</day>
                    <month>12</month>
                    <year>2013</year>
                </date>
                <date date-type="rev-request">
                    <day>14</day>
                    <month>03</month>
                    <year>2014</year>
                </date>
                <date date-type="rev-recd">
                    <day>14</day>
                    <month>04</month>
                    <year>2014</year>
                </date>
                <date date-type="accepted">
                    <day>18</day>
                    <month>05</month>
                    <year>2014</year>
                </date>
            </history>
            <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
            <copyright-statement>&#169;Elad Yom-Tov, Diana Borsa, Ingemar J Cox, Rachel A McKendry. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 18.06.2014. </copyright-statement>
            <copyright-year>2014</copyright-year>
            <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/2.0/">
                <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
            </license>
            <self-uri xlink:href="http://www.jmir.org/2014/6/e154/" xlink:type="simple" />
            <abstract>
                <sec sec-type="background">
                    <title>Background</title>
                    <p>Mass gatherings, such as music festivals and religious events, pose a health care challenge because of the risk of transmission of communicable diseases. This is exacerbated by the fact that participants disperse soon after the gathering, potentially spreading disease within their communities. The dispersion of participants also poses a challenge for traditional surveillance methods. The ubiquitous use of the Internet may enable the detection of disease outbreaks through analysis of data generated by users during events and shortly thereafter.</p>
                </sec>
                <sec sec-type="objective">
                    <title>Objective</title>
                    <p>The intent of the study was to develop algorithms that can alert to possible outbreaks of communicable diseases from Internet data, specifically Twitter and search engine queries.</p>
                </sec>
                <sec sec-type="methods">
                    <title>Methods</title>
                    <p>We extracted all Twitter postings and queries made to the Bing search engine by users who repeatedly mentioned one of nine major music festivals held in the United Kingdom and one religious event (the Hajj in Mecca) during 2012, for a period of 30 days and after each festival. We analyzed these data using three methods, two of which compared words associated with disease symptoms before and after the time of the festival, and one that compared the frequency of these words with those of other users in the United Kingdom in the days following the festivals.</p>
                </sec>
                <sec sec-type="results">
                    <title>Results</title>
                    <p>The data comprised, on average, 7.5 million tweets made by 12,163 users, and 32,143 queries made by 1756 users from each festival. Our methods indicated the statistically significant appearance of a disease symptom in two of the nine festivals. For example, cough was detected at higher than expected levels following the Wakestock festival. Statistically significant agreement (chi-square test, <italic>P</italic>&#60;.01) between methods and across data sources was found where a statistically significant symptom was detected. Anecdotal evidence suggests that symptoms detected are indeed indicative of a disease that some users attributed to being at the festival.</p>
                </sec>
                <sec sec-type="conclusions">
                    <title>Conclusions</title>
                    <p>Our work shows the feasibility of creating a public health surveillance system for mass gatherings based on Internet data. The use of multiple data sources and analysis methods was found to be advantageous for rejecting false positives. Further studies are required in order to validate our findings with data from public health authorities.</p>
                </sec>
            </abstract>
            <kwd-group>
                <kwd>mass gatherings</kwd>
                <kwd>infodemiology</kwd>
                <kwd>infectious disease</kwd>
                <kwd>information retrieval</kwd>
                <kwd>data mining</kwd>
            </kwd-group>
        </article-meta>
    </front>
    <body>
        <sec sec-type="introduction">
            <title>Introduction</title>
            <sec>
                <title>Background</title>
                <p>Historically, infectious diseases have devastated societies. Examples include the &#8220;Black Death&#8221; bubonic plague of the 14th century in which between 30-40% of Europe&#8217;s population is estimated to have died [<xref ref-type="bibr" rid="ref1">1</xref>], and the influenza epidemic of 1918-1920, in which as many as 50 million are estimated to have died [<xref ref-type="bibr" rid="ref2">2</xref>]. Despite very significant advances in medicine, infectious diseases remain potentially very serious threats to society. For example, a pandemic influenza is rated as the greatest national risk on the UK government risk register [<xref ref-type="bibr" rid="ref3">3</xref>]. An estimated 35.3 million people are HIV-infected [<xref ref-type="bibr" rid="ref4">4</xref>], drug-resistant Methicillin-resistant <italic>Staphylococcus aureus</italic> (MRSA) is a major public health concern [<xref ref-type="bibr" rid="ref5">5</xref>], about 2 million cases of cancer are caused by infections each year [<xref ref-type="bibr" rid="ref6">6</xref>], and infection is a major source of morbidity in primary care [<xref ref-type="bibr" rid="ref7">7</xref>]. Moreover, emerging new infections, such as H1N1 influenza, can cause pandemics, spreading rapidly and unpredictably. Early diagnostics play a crucial role in prevention, treatment, and care but most tests require samples to be sent to specialist laboratories leading to inherent delays between tests, results, and clinical interventions. Public health intervention may be further delayed by the time lag of 1-2 weeks associated with retrospective surveillance. There are increasing national and international drivers to dramatically improve our capacity to rapidly respond to infectious diseases by widening access to tests in community settings and drive innovative real-time surveillance</p>
                <p>Protection against infectious diseases includes the development of new medicines, vaccination programs, improved hygiene, and promotion of behavioral modifications. While together these efforts may reduce the risk of infectious diseases, the risk cannot be eliminated. Consequently, infectious disease surveillance networks at national and international levels have been established. The purpose of public health surveillance networks is to provide &#8220;Ongoing systematic collection, analysis, interpretation and dissemination of data regarding a health-related event for use in public health action to reduce morbidity and mortality and to improve health&#8221; [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
                <p>The most reliable sources of data for public health surveillance networks are confirmed diagnoses of diseases. Unfortunately, confirming a diagnosis may take days or weeks, due to a variety of delays including (1) time to ship a patient sample to a testing laboratory, (2) time to perform the test, and (3) time to report the results.</p>
                <p>Delays in identifying the onset of an infectious epidemic result in delayed responses, which can significantly exacerbate the impact of the epidemic on a society. Consequently, there is strong interest in reducing delays. One way to accomplish this is through syndromic surveillance, which emphasizes &#8220;the use of near &#8216;real-time&#8217; data and automated tools to detect and characterize unusual activity for further public health investigation&#8221; [<xref ref-type="bibr" rid="ref9">9</xref>]. There is a range of pre-diagnostic data that can and has been used, including clinical data such as nurse advice line activity, school nurse visits, poison control center data, EMS records, emergency department visits, outpatient records, laboratory/radiology orders and results, prescription medication sales, and electronic health records, and non-clinical data such as over-the-counter (OTC) medications, work and school absenteeism records, ambulance dispatch data, zoonotic surveillance data (eg, dead birds from West Nile virus activity), health-related Web searches, and other data from online social networks.</p>
                <p>The use of syndromic surveillance systems dates back to at least 1977, when Welliver et al [<xref ref-type="bibr" rid="ref10">10</xref>] reported the use of OTC medication sales in Los Angeles. The early 2000s saw renewed interest in syndromic surveillance as a result of a US Defense Advanced Research Projects Agency (DARPA) initiative called ENCOMPASS (ENhanced COnsequence Management Planning And Support System) to provide an early warning system to protect against bioterrorism. As early as 2001, it was suggested to use query logs associated with health care websites as one form of syndromic data [<xref ref-type="bibr" rid="ref11">11</xref>]. The advantage of online data sources is that the data collection is usually straightforward and very timely, that is, the lag between data creation, collection, and analysis can be very short (possibly seconds). We are therefore interested in online syndromic surveillance, which is discussed in more detail in the next section.</p>
                <p>The World Health Organization (WHO) states that &#8220;an organized or unplanned event can be classified as a mass gathering if the number of people attending is sufficient to strain the planning and response resources of the community, state, or nation hosting the event&#8221; [<xref ref-type="bibr" rid="ref12">12</xref>]. Examples of mass gatherings include very large religious gatherings such as the Hajj (approximately 2 million people) and the Hindu Kumbh Mela (estimated at 80-100 million people), large international sporting events such as the Olympics, and national music festivals such as Glastonbury in the United Kingdom. Mass gatherings have been sources for the spread of infectious diseases. The spread of cholera from a well in Mecca was documented as far back as 1883 [<xref ref-type="bibr" rid="ref13">13</xref>]. More recently, during the 1992 Glastonbury music festival attended by 70,000 people in the United Kingdom, 72 cases of Campylobacter infection were reported due to drinking unpasteurized milk [<xref ref-type="bibr" rid="ref14">14</xref>]. In 2009, [<xref ref-type="bibr" rid="ref15">15</xref>] reported an outbreak of H1N1 influenza at the Rock Werchter festival in Belgium. Also in 2009, [<xref ref-type="bibr" rid="ref16">16</xref>] reported outbreaks of H1N1 influenza at a sports event and at a music festival, called EXIT, where 62 confirmed cases were identified. In the same year, a further case was reported at a music festival in Hungary [<xref ref-type="bibr" rid="ref17">17</xref>]. The issue of mass gatherings, medicine, and global health security was the subject of a series of reports in <italic>The Lancet</italic> in 2012.</p>
                <p>In the next section, we provide a discussion of prior work on syndromic surveillance based on online social networks and search engine query logs.</p>
            </sec>
            <sec>
                <title>Related Work</title>
                <p>In 2001, Wagner et al [<xref ref-type="bibr" rid="ref11">11</xref>] first suggested the utility of query terms to detect infectious diseases. In particular, they presented data on the number of queries to a health website (WebMD) using words such as &#8220;cold&#8221; and &#8220;flu&#8221;. Though no quantitative assessment was provided, qualitatively a correlation is visible between the query frequency and measures of infectious disease. A related quantitative analysis was documented in subsequent work [<xref ref-type="bibr" rid="ref18">18</xref>], which took &#8220;the weekly counts of the number of accesses of selected influenza-related articles on the Healthlink website and measured their correlation with traditional influenza surveillance data from the Centers for Disease Control and Prevention (CDC)&#8221;. The results showed a clear correlation; however, interestingly, the Web log data was no more timely than that of the CDC, that is, the Web log data did not allow an influenza outbreak to be detected any sooner than with traditional surveillance methods.</p>
                <p>Later, Eysenbach [<xref ref-type="bibr" rid="ref19">19</xref>] used information from Google&#8217;s AdSense to indirectly estimate the number of queries for particular search terms that contained keywords related to influenza. Specifically, Eysenbach reported correlations between the &#8220;number of clicks on a keyword-triggered influenza link&#8221; and traditional measures such as (1) the number of lab tests, and (2) the number of positive lab test results (cases). Pearson correlation scores of between .85 and .91 are reported. Interestingly, the higher correlation score was obtained when correlating with the number of cases reported for the next week, indicating the Web-based information was more timely.</p>
                <p>A number of systems have been developed to gather and analyze unstructured information that is openly available on the Web. The earliest example of this is Global Public Health Intelligence Network (GPHIN) developed by the Canadian government and the WHO [<xref ref-type="bibr" rid="ref20">20</xref>]. A number of systems have subsequently been deployed, including BioCaster [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], EpiSPIDER [<xref ref-type="bibr" rid="ref23">23</xref>], and HealthMap [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Comparisons of these various systems can be found in [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>].</p>
                <p>Interest in Web-based surveillance increased significantly with the publication by Polgreen et al [<xref ref-type="bibr" rid="ref28">28</xref>] and Ginsberg et al [<xref ref-type="bibr" rid="ref29">29</xref>] of relationships between query search terms and influenza-like illness (ILI) based on Yahoo and Google search logs, respectively. Polgreen et al showed that it was possible to estimate the percentage of positive cultures for influenza and the deaths attributable to pneumonia and influenza in the United States, and to do so several weeks ahead of actual culture results. Ginsberg et al reported similar findings. A further contribution of [<xref ref-type="bibr" rid="ref29">29</xref>] was to automatically determine the best set of query search terms that correlate with CDC estimates. The work by Ginsberg et al has subsequently been developed as Google Flu Trends and its more generic service, Google Trends [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
                <p>A large body of research has since been developed that utilizes data from online social network or query logs to infer health information. This includes work on mining blog posts that mention influenza. For example, Corley et al [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>] describe collecting blogs from a variety of sources and looking for the frequency of occurrence of keywords such as &#8220;influenza&#8221;. After normalization, they reported Pearson correlation scores of .77 and .55 for two datasets with corresponding ILI reports from the CDC (CDC ILINet reports). This work also discusses the possibility of identifying relevant online communities and developing associated targeted intervention strategies.</p>
                <p>The analysis of microblogging data from Twitter for health purposes has recently received attention [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. Inspired by the approach in Ginsberg et al [<xref ref-type="bibr" rid="ref29">29</xref>], Cullota et al [<xref ref-type="bibr" rid="ref35">35</xref>] applies a similar approach to Twitter data revealing the benefits of having longer, more complete messages as opposed to unstructured search query entries. This allows for simpler classification algorithms that can also filter out many of the erroneous messages that typically occur and would sometimes overwhelm the classifier predictions [<xref ref-type="bibr" rid="ref38">38</xref>]. Lampos and Cristianini [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>] performed an analysis of tracking influenza rates throughout the United Kingdom. Their major contribution to the existing regression-based models was proposing a new automatic way of selecting the keywords used by the classifier. These were learned from a large pool of candidates extracted from Web articles related to influenza, imposing a scarcity constraint via an L1 norm penalty in the least squares prediction error. This method yielded a correlation of 97% with respect to the reported influenza rates. Unfortunately, the proposed way of automatically building the vocabulary is based solely on correlation and sometimes produces terms that, although highly correlated with the flu trends, may not make good candidates to track for future predictions: for instance, automatically selected keywords &#8220;phone&#8221;, &#8220;nation&#8221;, or &#8220;mention&#8221; might not be good indicators of the presence of ILI conditions.</p>
            </sec>
        </sec>
        <sec sec-type="methods">
            <title>Methods</title>
            <sec>
                <title>Data</title>
                <p>We examined 10 events, nine of which were in the United Kingdom and one (the annual Hajj in Mecca) that had significant participation from people in the United Kingdom. All events took place in the second half of 2012.</p>
                <p>We extracted two datasets for each event, one from the entire set of Twitter users and the other from that of the Microsoft Bing search engine. The population of Twitter users relevant to an event was defined as any user who mentioned a hashtag associated with an event at least twice between 30 days before and 30 days after the event. We refer to the relevant users as the target population. We also identified a population of users who could be used as a reference population (see Analysis Algorithms below) for each event by randomly sampling 1% of users who did not mention the event in their Twitter messages, but had the United Kingdom listed as their location in their profile. It comprised 345,849 users over the entire study period. For each Twitter message, we extracted an anonymized user identifier, the date and time of the message, and its text.</p>
                <p>We followed a similar methodology for detecting relevant users according to queries made on the Bing search engine by users who agreed to share their queries, and marked as relevant any user who mentioned an event at least twice in their queries. For each query made by the relevant users, we extracted the query text, time and date, and an anonymized user identifier. In order to maintain user privacy, data were first anonymized by hashing, before the investigators had access to them. They were then aggregated prior to analysis and no individual-level user datum was examined by the experimenters.</p>
                <p>On average, we identified approximately 14,000 Twitter users and 5650 Bing users. The list of events and basic statistics concerning the events are shown in <xref ref-type="table" rid="table1">Table 1</xref>, including the number of Twitter users who mentioned the event more than twice, the number of tweets that mentioned each event, the number of users who queried for each event, and the number of queries.</p>
                <p>We extracted all queries and Twitter messages for the relevant users from 30 days before an event until 30 days after it. The queries and messages were stemmed using a Porter stemmer [<xref ref-type="bibr" rid="ref41">41</xref>]. We then marked each query and Twitter message as to whether it contained one or more words or phrases describing medical symptoms given in a list of 195 medical symptoms and 457 corresponding synonyms described in Yom-Tov and Gabrilovich [<xref ref-type="bibr" rid="ref42">42</xref>]. This list of terms was derived from a set of terms in <italic>International Statistical Classification of Diseases and Related Health Problems, 10th Revision (ICD-10),</italic> expanded to include ways in which non-specialist people frequently refer to the medical terms. The expansion is based on terms that people use in order to reach the Wikipedia page referring to a medical symptom and the terms frequently associated with it in Web documents. A complete explanation of how the list was constructed can be found in Yom-Tov and Gabrilovich [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
                <p>A table listing the number of tweets that contained each of the symptom words or their synonyms in each of the festivals analyzed is provided in <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>.</p>
                <p />
                <table-wrap position="float" id="table1">
                    <label>Table 1</label>
                    <caption>
                        <p>List of analyzed events and statistics.</p>
                    </caption>
                    <table width="674" border="0" cellpadding="8" cellspacing="0" rules="groups" frame="hsides">
                        <col width="104" />
                        <col width="113" />
                        <col width="74" />
                        <col width="88" />
                        <col width="104" />
                        <col width="87" />
                        <col width="104" />
                        <thead>
                            <tr valign="bottom">
                                <td>Event</td>
                                <td>Dates</td>
                                <td>Capacity<sup>a</sup>
                                </td>
                                <td colspan="2">Twitter</td>
                                <td colspan="2">Bing</td>
                            </tr>
                            <tr valign="top">
                                <td>
                                    <break />
                                </td>
                                <td>
                                    <break />
                                </td>
                                <td>
                                    <break />
                                </td>
                                <td>Number of users</td>
                                <td>Number of festival mentions</td>
                                <td>Number of users</td>
                                <td>Number of festival queries</td>
                            </tr>
                        </thead>
                        <tbody>
                            <tr valign="top">
                                <td>Wakestock</td>
                                <td>6-8 July</td>
                                <td>10,000</td>
                                <td>3878</td>
                                <td>12,180</td>
                                <td>1177</td>
                                <td>3750</td>
                            </tr>
                            <tr valign="top">
                                <td>Wireless Festival</td>
                                <td>6-8 July</td>
                                <td>50,000</td>
                                <td>23,105</td>
                                <td>191,762</td>
                                <td>2309</td>
                                <td>6909</td>
                            </tr>
                            <tr valign="top">
                                <td>T in the Park</td>
                                <td>6-8 July</td>
                                <td>85,000</td>
                                <td>24,746</td>
                                <td>175,881</td>
                                <td>11,899</td>
                                <td>44,416</td>
                            </tr>
                            <tr valign="top">
                                <td>V Festival</td>
                                <td>17-19 August</td>
                                <td>90,000</td>
                                <td>22,018</td>
                                <td>92,722</td>
                                <td>14,704</td>
                                <td>50,796</td>
                            </tr>
                            <tr valign="top">
                                <td>Bestival</td>
                                <td>6-9 September</td>
                                <td>30,000</td>
                                <td>13,359</td>
                                <td>104,550</td>
                                <td>6715</td>
                                <td>23,330</td>
                            </tr>
                            <tr valign="top">
                                <td>Creamfields</td>
                                <td>24-26 August</td>
                                <td>80,000</td>
                                <td>21,703</td>
                                <td>191,663</td>
                                <td>5533</td>
                                <td>19,071</td>
                            </tr>
                            <tr valign="top">
                                <td>Hajj</td>
                                <td>24-27 October</td>
                                <td>3,161,573</td>
                                <td>17,473</td>
                                <td>129,137</td>
                                <td>3402</td>
                                <td>13,892</td>
                            </tr>
                            <tr valign="top">
                                <td>Isle of Wight Festival</td>
                                <td>22-24 June</td>
                                <td>60,000</td>
                                <td>6276</td>
                                <td>1398</td>
                                <td>4400</td>
                                <td>14,222</td>
                            </tr>
                            <tr valign="top">
                                <td>Download Festival</td>
                                <td>8-10 June</td>
                                <td>120,000</td>
                                <td>9360</td>
                                <td>1497</td>
                                <td>4598</td>
                                <td>17,267</td>
                            </tr>
                            <tr valign="top">
                                <td>RockNess</td>
                                <td>8-10 June</td>
                                <td>35,000</td>
                                <td>12,935</td>
                                <td>1068</td>
                                <td>1764</td>
                                <td>6266</td>
                            </tr>
                            <tr valign="top">
                                <td>Median</td>
                                <td>
                                    <break />
                                </td>
                                <td>70,000</td>
                                <td>15,416</td>
                                <td>98,636</td>
                                <td>4499</td>
                                <td>15,744</td>
                            </tr>
                        </tbody>
                    </table>
                    <table-wrap-foot>
                        <fn id="table1fn1">
                            <p>
                                <sup>a</sup>Capacity information from Wikifestivals and Wikipedia websites.</p>
                        </fn>
                    </table-wrap-foot>
                </table-wrap>
                <p />
            </sec>
            <sec>
                <title>Analysis Algorithms</title>
                <sec>
                    <title>Overview</title>
                    <p>We analyzed each dataset using three methods, described below. Briefly, Method 1 tests how well the probability of a word occurring as a function of time fits a lognormal distribution with variance between 1.2 and 1.5, since this is the epidemiological distribution predicted in [<xref ref-type="bibr" rid="ref43">43</xref>] for spread of infectious disease. Method 2 compares the number of times a symptom was mentioned before and after the date of an event, and uses a statistical test based on the False Discovery Rate (FDR) to determine significance. Method 3 computes the likelihood that symptoms would be measured at an observed frequency in a target population compared to what would be expected by chance. All three methods are described in detail below.</p>
                </sec>
                <sec>
                    <title>Method 1: Comparison to Background With Epidemiological Profile</title>
                    <p>Let <italic>P</italic>
                        <sup>
                            <italic>T</italic>
                        </sup>
                        <sub>
                            <italic>i</italic>
                        </sub>
                        <italic>(w,t)</italic> be the probability that the <italic>i</italic>-th word will appear in the target population on day <italic>t</italic>, where, in our data <italic>t</italic>&#8712;[&#8722;30,&#8722;29,...,29,30]. Similarly, we denote <italic>P</italic>
                        <sup>
                            <italic>R</italic>
                        </sup>
                        <sub>
                            <italic>i</italic>
                        </sub>
                        <italic>(w,t)</italic> as the same probability in the reference population, that is, in a population that is disjointed from the target population, but is located in a similar geographic area.</p>
                    <p>We assume that if there is an epidemic of an infectious disease in the population, users mention its symptoms in their text (eg, Twitter messages). In that case, a word <italic>P</italic>
                        <sup>
                            <italic>T</italic>
                        </sup>
                        <sub>
                            <italic>i</italic>
                        </sub>
                        <italic>(w,t)</italic> describing a symptom of the disease should follow the appearance profile of such a disease, which takes into account its incubation period. This profile should fit a lognormal distribution with a variance of between 1.2 and 1.5 [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
                    <p>Thus, for each of the symptom words, we compute its probability over time and normalize this by the same probability for the reference population, in order to exclude diseases that are unrelated to the event. Therefore, for each symptom word (and its synonyms), we compute a score given by <italic>P</italic>
                        <sup>
                            <italic>T</italic>
                        </sup>
                        <sub>
                            <italic>i</italic>
                        </sub>
                        <italic>(w,t)/ P</italic>
                        <sup>
                            <italic>R</italic>
                        </sup>
                        <sub>
                            <italic>i</italic>
                        </sub>
                        <italic>(w,t)</italic>, and fit to it a lognormal distribution with a center that varies from the first day of the event and until 14 days later. The day on which the best fit is found (in the least squares sense) is chosen to represent the distribution of this word.</p>
                    <p>In order to ascertain if the fit of the distribution is statistically significant, we employ the FDR procedure [<xref ref-type="bibr" rid="ref44">44</xref>] and conduct the same procedure for a random set of 1950 non-symptom words (10 times larger than the symptom list) and display a symptom only if its fit to the lognormal distribution is greater than would be expected at an FDR of 1%.</p>
                    <p>This method should work well if there is a large enough target population to generate information pertaining to the epidemic and should enable not only the identification of the outbreak but also its temporal profile.</p>
                </sec>
                <sec>
                    <title>Method 2: Comparison to Background and Time</title>
                    <p>Here, we follow Yom-Tov and Gabrilovich [<xref ref-type="bibr" rid="ref42">42</xref>] and construct a 2&#215;2 contingency table that measures the number of times a symptom was mentioned before and after the date of the event (see <xref ref-type="table" rid="table2">Table 2</xref> for an example), for either the target or reference population. Each symptom is then scored according to the chi-square score computed from the table.</p>
                    <p>A threshold for statistical significance is computed using FDR [<xref ref-type="bibr" rid="ref44">44</xref>] with a random set of non-symptom words. We report symptoms with a chi-square score higher than that expected at an FDR of 1%.</p>
                    <table-wrap position="float" id="table2">
                        <label>Table 2</label>
                        <caption>
                            <p>The 2x2 contingency table for computing the chi-square score of Method 2.</p>
                        </caption>
                        <table width="674" border="0" cellpadding="7" cellspacing="0" rules="groups" frame="hsides">
                            <col width="203" />
                            <col width="168" />
                            <col width="148" />
                            <thead>
                                <tr valign="top">
                                    <td rowspan="2">Number of times that the user mentioned/queried for the symptom or its synonym</td>
                                    <td colspan="2">User queried for or tweeted about the festival?</td>
                                </tr>
                                <tr valign="top">
                                    <td>No</td>
                                    <td>Yes</td>
                                </tr>
                            </thead>
                            <tbody>
                                <tr valign="top">
                                    <td>Before Day 0</td>
                                    <td>N<sub>11</sub>
                                    </td>
                                    <td>N<sub>12</sub>
                                    </td>
                                </tr>
                                <tr valign="top">
                                    <td>After Day 0</td>
                                    <td>N<sub>21</sub>
                                    </td>
                                    <td>N<sub>22</sub>
                                    </td>
                                </tr>
                            </tbody>
                        </table>
                    </table-wrap>
                    <p />
                </sec>
                <sec>
                    <title>Method 3: What&#8217;s Strange About Recent Events</title>
                    <p>Following the approach in [<xref ref-type="bibr" rid="ref45">45</xref>] (What&#8217;s Strange About Recent Events [WSARE]), for each day after the mass gathering, <italic>t</italic>&#8712;[1,&#8943;,30], we compute a one-term rule score for each symptom in our vocabulary. The score is computed using a hypothesis test in which the null hypothesis is the independence between history records and current day counts. We apply the Fisher&#8217;s exact test on a 2&#215;2 contingency table, as shown in <xref ref-type="table" rid="table3">Table 3</xref>, made out of the current day&#8217;s symptom count and the number of times the symptom was mentioned in the time prior to the festivals.</p>
                    <p>The test generates a <italic>P</italic> value, given by <italic>P(x=k)=C(K, k)C(N-K, n-k)/C(N, n)</italic>, with <italic>C(n, k)</italic> being the binomial coefficient (&#8220;n choose k&#8221;) &#8211; <italic>C(n,k)=n!/k!(n-k)!</italic> and where <italic>k</italic> is the number of tweets containing the keyword <italic>w</italic>
                        <sub>
                            <italic>i</italic>
                        </sub> today, <italic>K</italic> is the number of times the keyword <italic>w</italic>
                        <sub>
                            <italic>i</italic>
                        </sub> was mentioned in the period before the festival, <italic>n</italic> is the number of tweets today, and <italic>N</italic> is the number of tweets in the period before the festival.</p>
                    <p>Since we are computing a score for each day, we consider as baseline the corresponding weekdays in the 30-day time window (ie, if the current day is Tuesday, we will look back to all Tuesdays in the time before the mass gathering and take that as our history baseline). This is done primarily to eliminate false detection due to periodic weekly trends in Twitter postings.</p>
                    <p />
                    <table-wrap position="float" id="table3">
                        <label>Table 3</label>
                        <caption>
                            <p>The 2&#215;2 contingency table (rule <italic>w</italic>
                                <sub>
                                    <italic>i</italic>
                                </sub>=1: tweet contains keyword <italic>w</italic>
                                <sub>
                                    <italic>i</italic>
                                </sub>) for Fisher&#8217;s exact test.</p>
                        </caption>
                        <table width="674" border="0" cellpadding="8" cellspacing="0" rules="groups" frame="hsides">
                            <col width="76" />
                            <col width="295" />
                            <col width="302" />
                            <thead>
                                <tr valign="top">
                                    <td>
                                        <break />
                                    </td>
                                    <td>
                                        <italic>C</italic>
                                        <sub>
                                            <italic>today</italic>
                                        </sub>
                                    </td>
                                    <td>
                                        <italic>C</italic>
                                        <sub>
                                            <italic>history</italic>
                                        </sub>
                                    </td>
                                </tr>
                            </thead>
                            <tbody>
                                <tr valign="bottom">
                                    <td>
                                        <italic>w</italic>
                                        <sub>
                                            <italic>i</italic>
                                        </sub>=1</td>
                                    <td># today tweets containing <italic>w</italic>
                                        <sub>
                                            <italic>i</italic>
                                        </sub>
                                        <italic>,</italic> (<italic>k</italic>)<sup>a</sup>
                                    </td>
                                    <td># history tweets containing <italic>w</italic>
                                        <sub>
                                            <italic>i</italic>
                                        </sub>
                                        <italic>(K)</italic>
                                        <sup>b</sup>
                                    </td>
                                </tr>
                                <tr valign="top">
                                    <td>
                                        <italic>w</italic>
                                        <sub>
                                            <italic>i</italic>
                                        </sub>=0</td>
                                    <td># today tweets not mentioning <italic>w</italic>
                                        <sub>
                                            <italic>i</italic>
                                        </sub>
                                        <italic>, (n-k)</italic>
                                    </td>
                                    <td># history tweets not mentioning <italic>w</italic>
                                        <sub>
                                            <italic>i</italic>
                                        </sub>
                                        <italic>(N-K)</italic>
                                    </td>
                                </tr>
                                <tr valign="bottom">
                                    <td>
                                        <break />
                                    </td>
                                    <td>
                                        <italic>n</italic>
                                        <sup>c</sup>
                                    </td>
                                    <td>
                                        <italic>N</italic>
                                        <sup>d</sup>
                                    </td>
                                </tr>
                            </tbody>
                        </table>
                        <table-wrap-foot>
                            <fn id="table3fn1">
                                <p>
                                    <sup>a</sup>
                                    <italic>k:</italic> the number of tweets containing the keyword <italic>w</italic>
                                    <sub>
                                        <italic>i</italic>
                                    </sub> today.</p>
                            </fn>
                            <fn id="table3fn2">
                                <p>
                                    <sup>b</sup>
                                    <italic>K:</italic> the number of times the keyword <italic>w</italic>
                                    <sub>
                                        <italic>i</italic>
                                    </sub> was mentioned in the period before the festival.</p>
                            </fn>
                            <fn id="table3fn3">
                                <p>
                                    <sup>c</sup>
                                    <italic>n:</italic> the number of tweets today.</p>
                            </fn>
                            <fn id="table3fn4">
                                <p>
                                    <sup>d</sup>
                                    <italic>N:</italic> the number of tweets in the period before the festival.</p>
                            </fn>
                        </table-wrap-foot>
                    </table-wrap>
                    <p />
                </sec>
            </sec>
        </sec>
        <sec sec-type="results">
            <title>Results</title>
            <p>As noted above, the target population was defined as any user who tweeted a hashtag related to the event during the data period. To validate this heuristic, a random sample of 200 twitter users who mentioned the Wakestock festival in their tweets were analyzed. Their tweets were labeled as to whether or not the tweets of a user implied that they were at the event. The area under the receiver operating characteristic (ROC) curve for this label as a function of the number of tweets a user made that had the event hashtag was 0.91 and the true detection rate at the threshold of two tweets was 0.70. Therefore, the majority of people who were detected by our heuristic did, in fact, attend the festival. The remaining users either did not attend the event, and thus added noise to our analysis, or did not mention their attendance in their tweets.</p>
            <p>
                <xref ref-type="table" rid="table4">Table 4</xref> shows the list of statistically significant symptoms (at <italic>P</italic>&#60;.01) identified in the Twitter data for each of the 10 events. Several observations are in order. First, though most identified symptoms are mild (eg, tired), in some events, the symptoms could be a cause for concern. For example, in the Bestival event, the symptom was &#8220;tremor&#8221;.</p>
            <p>In only two of the events (Wakestock and V Festival) did all three methods identify the same symptoms. Anecdotally, once &#8220;cough&#8221; was identified as a possible symptom after the Wakestock festival, we found tweets such as &#8220;anyone else still suffering from the wakestock cough? can&#8217;t be only me&#8221;, which were made by people who were identified as having been to the festival, suggesting that this is a true symptom that was also self-identified as due to the event. This, together with the fact that it was identified by all three analysis methods, indicates that this symptom is very unlikely to be a spurious false positive, especially as it was identified by making different comparisons within the data (eg, target vs control population and before vs after the event in the target population). Thus, the use of more than one analysis method strengthens the analysis and reduces the likelihood of false positives.</p>
            <p>We tested the agreement between all pairs of analysis methods for each of the events using a chi-square test at a threshold of <italic>P</italic>=.01. Methods 2 and 3 had a statistically significant agreement in six of the 10 events, Methods 1 and 3 in two of eight events (two of the events had no identified symptoms), and Methods 1 and 2 in three of eight of the events. We also found a statistically significant agreement between sources for three of the events: Wakestock, V Festival, and T in the Park. The agreement rate expected by chance, as computed using an FDR procedure, is 5 of 1000 comparisons. Therefore, these agreements are much higher than expected by chance and lend support to the hypothesis that the different methods identified real signals, through alternative means.</p>
            <p>
                <xref ref-type="table" rid="table5">Table 5</xref> shows the list of statistically significant symptoms (at <italic>P</italic>&#60;.01) identified in the Bing data for each of the 10 events using Method 2. We applied only this method because there was insufficient daily activity in the Bing data to allow the application of Methods 1 and 3. As this table shows, the symptoms identified in the Bing data were potentially more serious (eg, &#8220;diarrhea&#8221; and &#8220;vomiting&#8221;) and also more personally sensitive. This is probably because users tend to share more sensitive information in anonymous media [<xref ref-type="bibr" rid="ref46">46</xref>]. Thus, the use of Bing data complements Twitter data in the kinds of symptoms that are identified. However, the relative sparseness of this data, which is at least partly related to the number of Bing users in the United Kingdom, also means that not all methods are applicable to it.</p>
            <p>In order to validate whether our methods might result in false positive symptoms, we also applied our methods to an event with a small physical footprint, but one that had significant media attention. Specifically, we chose the opening of The Shard building in London (the tallest building in the European Union) on July 5, 2012. This event was mentioned by 2007 users in 5553 tweets. No symptoms were reported at statistically significant levels by any of these methods. This provides evidence that when no symptoms exist, our methods will not report spurious symptoms.</p>
            <p />
            <table-wrap position="float" id="table4">
                <label>Table 4</label>
                <caption>
                    <p>Statistically significant symptoms<sup>a</sup> from Twitter data for each event and three analysis methods.</p>
                </caption>
                <table width="674" border="0" cellpadding="8" cellspacing="0" rules="groups" frame="hsides">
                    <col width="202" />
                    <col width="104" />
                    <col width="210" />
                    <col width="158" />
                    <thead>
                        <tr valign="top">
                            <td>Event</td>
                            <td>Method 1</td>
                            <td>Method 2</td>
                            <td>Method 3</td>
                        </tr>
                    </thead>
                    <tbody>
                        <tr valign="top">
                            <td>Wakestock</td>
                            <td>Cough</td>
                            <td>Cough</td>
                            <td>Tired, cough</td>
                        </tr>
                        <tr valign="top">
                            <td>Wireless Festival</td>
                            <td>None</td>
                            <td>Tired, pain, tremor</td>
                            <td>Tired, flatulence</td>
                        </tr>
                        <tr valign="top">
                            <td>T in the Park</td>
                            <td>Tired</td>
                            <td>Tired, pain, cough</td>
                            <td>Tired, cough</td>
                        </tr>
                        <tr valign="top">
                            <td>V Festival</td>
                            <td>Depression</td>
                            <td>Tired, pain, depression</td>
                            <td>Depression</td>
                        </tr>
                        <tr valign="top">
                            <td>Bestival</td>
                            <td>None</td>
                            <td>Tired, pain, tremor</td>
                            <td>Tired, fever</td>
                        </tr>
                        <tr valign="top">
                            <td>Creamfields</td>
                            <td>None</td>
                            <td>Tired, pain, blindness</td>
                            <td>None</td>
                        </tr>
                        <tr valign="top">
                            <td>Hajj</td>
                            <td>Rash, wound</td>
                            <td>Tired</td>
                            <td>Tired</td>
                        </tr>
                        <tr valign="top">
                            <td>Isle of Wight Festival</td>
                            <td>None</td>
                            <td>Bleeding</td>
                            <td>None</td>
                        </tr>
                        <tr valign="top">
                            <td>Download Festival</td>
                            <td>None</td>
                            <td>None</td>
                            <td>None</td>
                        </tr>
                        <tr valign="top">
                            <td>RockNess</td>
                            <td>None</td>
                            <td>Phobia, swelling</td>
                            <td>None</td>
                        </tr>
                    </tbody>
                </table>
                <table-wrap-foot>
                    <fn id="table4fn1">
                        <p>
                            <sup>a</sup>When more than three symptoms were significant, only the top three are shown.</p>
                    </fn>
                </table-wrap-foot>
            </table-wrap>
            <p />
            <p />
            <table-wrap position="float" id="table5">
                <label>Table 5</label>
                <caption>
                    <p>Statistically significant symptoms<sup>a</sup> from Bing data for each event using Method 2.</p>
                </caption>
                <table width="674" border="0" cellpadding="8" cellspacing="0" rules="groups" frame="hsides">
                    <col width="303" />
                    <col width="371" />
                    <thead>
                        <tr valign="top">
                            <td>Event</td>
                            <td>Method 2</td>
                        </tr>
                    </thead>
                    <tbody>
                        <tr valign="top">
                            <td>Wakestock</td>
                            <td>Pain</td>
                        </tr>
                        <tr valign="top">
                            <td>Wireless Festival</td>
                            <td>Pain</td>
                        </tr>
                        <tr valign="top">
                            <td>T in the Park</td>
                            <td>Wound, cough, diarrhea</td>
                        </tr>
                        <tr valign="top">
                            <td>V Festival</td>
                            <td>Perspiration, edema, wound</td>
                        </tr>
                        <tr valign="top">
                            <td>Bestival</td>
                            <td>Vomiting, diarrhea</td>
                        </tr>
                        <tr valign="top">
                            <td>Creamfields</td>
                            <td>Wound, rash, itch</td>
                        </tr>
                        <tr valign="top">
                            <td>Hajj</td>
                            <td>Fever, flatulence, pain</td>
                        </tr>
                        <tr valign="top">
                            <td>Isle of Wight Festival</td>
                            <td>Headache, fever, flatulence</td>
                        </tr>
                        <tr valign="top">
                            <td>Download Festival</td>
                            <td>Diarrhea, wound, headache</td>
                        </tr>
                        <tr valign="top">
                            <td>RockNess</td>
                            <td>Fever</td>
                        </tr>
                    </tbody>
                </table>
                <table-wrap-foot>
                    <fn id="table5fn1">
                        <p>
                            <sup>a</sup>When more than three symptoms were significant, only the top three are shown.</p>
                    </fn>
                </table-wrap-foot>
            </table-wrap>
            <p />
        </sec>
        <sec sec-type="discussion">
            <title>Discussion</title>
            <sec>
                <title>Principal Findings</title>
                <p>Mass gatherings are potentially significant to the spread of infectious diseases. However, traditional surveillance methods are challenged by the fact the participants may congregate and disperse very quickly. In this paper, we investigated whether syndromic surveillance based on Twitter and query logs could be used to monitor mass gatherings.</p>
                <p>We looked at nine music festivals that took place in the United Kingdom in 2012 as well as the 2012 Hajj religious gathering in Mecca. When analyzing the Twitter data, we considered three different statistical methods. The three methods did not always give the same results, with Methods 1 and 3 finding no statistically significant symptoms almost half of the time. However, when all three methods did identify statistically significant symptoms at the same concert, there was almost always agreement with at least one of the symptoms.</p>
                <p>Each of the three methods compares different attributes of the data in order to detect medical symptoms. Because of this, each method might be better in the analysis of data from some festivals, while for others it will perform less accurately. By using more than one method, we afford two benefits. First, if more than one method discovers a symptom has appeared with an unexpectedly high probability (as noted above), this strengthens the evidence that this symptom has indeed appeared in festival participants. Second, at the cost of higher false positive rates (but also higher true positives), health authorities might choose to use symptoms discovered by any of the methods as possible candidates for further investigation.</p>
                <p>The relative lack of data provided by the Bing query logs permitted only Method 2 to be used. Generally, the statistically significant symptoms that were identified were different from the symptoms identified by Twitter. We hypothesized that this is because users rightly perceive that tweets are public, while queries are private. Consequently, the symptoms identified by the query log describe more private indicators such as &#8220;flatulence&#8221; and &#8220;diarrhea&#8221;. Nevertheless, for two concerts, namely &#8220;Wirelessfest&#8221; and &#8220;T in the Park&#8221;, using Method 2 for both Tweets and query logs, the same symptoms were identified as &#8220;pain&#8221; and &#8220;cough&#8221; respectively.</p>
            </sec>
            <sec>
                <title>Limitations and Conclusions</title>
                <p>To the best of our knowledge, no infectious outbreaks at mass gatherings were reported to health authorities during the last 18 months, the period for which query logs are available. While this is, of course, fortunate, it prevents any comparison with ground truth data. Future work is needed to compare results from Internet data with results obtained from traditional methods. Note, however, that the use of traditional surveillance methods can be challenging in the context of mass gatherings due to the combination of an incubation period prior to onset of symptoms and dispersal of participants to their home regions.</p>
                <p>An additional drawback of our method is that some of the identified symptoms (eg, tired) might not be a symptom of a disease, but instead the outcomes of going to specific types of events. Therefore, an additional filtering stage might be required so as to remove symptoms that regularly appear in similar events.</p>
                <p />
            </sec>
        </sec>
    </body>
    <back>
        <app-group>
            <app id="app1">
                <title>Multimedia Appendix 1</title>
                <p>Number of tweets containing symptom words for each festival.</p>
                <media xlink:href="jmir_v16i6e154_app1.pdf" xlink:title="PDF File (Adobe PDF File), 160KB" />
            </app>
        </app-group>
        <glossary>
            <title>Abbreviations</title>
            <def-list>
                <def-item>
                    <term id="abb1">CDC</term>
                    <def>
                        <p>Centers for Disease Control and Prevention</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb2">EMS</term>
                    <def>
                        <p>Emergency Medical Services</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb3">FDR</term>
                    <def>
                        <p>false discovery rate</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb4">ILI</term>
                    <def>
                        <p>influenza-like illness</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb5">OTC</term>
                    <def>
                        <p>over the counter</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb6">WHO</term>
                    <def>
                        <p>World Health Organization</p>
                    </def>
                </def-item>
            </def-list>
        </glossary>
        <ack>
            <p>The work described here was supported by an Engineering and Physical Sciences Research Council Interdisciplinary Research Collaboration (EPSRC IRC) in Early Warning Sensing Systems for Infectious Diseases EP/K031953/1, the and School of the Built Environment, Engineering and Mathematical and Physical Sciences (BEAMS), University College London, and a Royal Society Wolfson Merit Award for RM BEAMS. We thank these institutions for their support. The authors would also like thank Dame Anne Johnson of University College London for suggesting the problem and for many useful discussions.</p>
        </ack>
        <fn-group>
            <fn fn-type="conflict">
                <p>None declared.</p>
            </fn>
        </fn-group>
        <ref-list>
            <ref id="ref1">
                <label>1</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Perry</surname>
                            <given-names>RD</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Fetherston</surname>
                            <given-names>JD</given-names>
                        </name>
                    </person-group>
                    <article-title>Yersinia pestis--etiologic agent of plague</article-title>
                    <source>Clin Microbiol Rev</source>
                    <year>1997</year>
                    <month>01</month>
                    <volume>10</volume>
                    <issue>1</issue>
                    <fpage>35</fpage>
                    <lpage>66</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://cmr.asm.org/cgi/pmidlookup?view=long&#38;pmid=8993858" />
                    </comment>
                    <pub-id pub-id-type="medline">8993858</pub-id>
                    <pub-id pub-id-type="pmcid">PMC172914</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Johnson</surname>
                            <given-names>NP</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Mueller</surname>
                            <given-names>J</given-names>
                        </name>
                    </person-group>
                    <article-title>Updating the accounts: global mortality of the 1918-1920 &#34;Spanish&#34; influenza pandemic</article-title>
                    <source>Bull Hist Med</source>
                    <year>2002</year>
                    <volume>76</volume>
                    <issue>1</issue>
                    <fpage>105</fpage>
                    <lpage>15</lpage>
                    <pub-id pub-id-type="medline">11875246</pub-id>
                    <pub-id pub-id-type="pii">S1086317602101050</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <nlm-citation citation-type="web">
                    <source>Cabinet Office National Risk Register of Civil Emergencies 2012 Edition</source>
                    <access-date>2014-06-06</access-date>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.gov.uk/government/publications/national-risk-register-for-civil-emergencies-2012-update">https://www.gov.uk/government/publications/national-risk-register-for-civil-emergencies-2012-update</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6Q79v6EPN</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <nlm-citation citation-type="web">
                    <source>Global Report: UNAIDS report on the global AIDS epidemic 2013</source>
                    <access-date>2014-06-06</access-date>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.unaids.org/en/resources/campaigns/globalreport2013/index.html">http://www.unaids.org/en/resources/campaigns/globalreport2013/index.html</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6Q7A04zh2</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Grundmann</surname>
                            <given-names>H</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Aires-de-Sousa</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Boyce</surname>
                            <given-names>J</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Tiemersma</surname>
                            <given-names>E</given-names>
                        </name>
                    </person-group>
                    <article-title>Emergence and resurgence of meticillin-resistant Staphylococcus aureus as a public-health threat</article-title>
                    <source>Lancet</source>
                    <year>2006</year>
                    <month>09</month>
                    <day>2</day>
                    <volume>368</volume>
                    <issue>9538</issue>
                    <fpage>874</fpage>
                    <lpage>85</lpage>
                    <pub-id pub-id-type="doi">10.1016/S0140-6736(06)68853-3</pub-id>
                    <pub-id pub-id-type="medline">16950365</pub-id>
                    <pub-id pub-id-type="pii">S0140-6736(06)68853-3</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Parkin</surname>
                            <given-names>DM</given-names>
                        </name>
                    </person-group>
                    <article-title>The global health burden of infection-associated cancers in the year 2002</article-title>
                    <source>Int J Cancer</source>
                    <year>2006</year>
                    <month>06</month>
                    <day>15</day>
                    <volume>118</volume>
                    <issue>12</issue>
                    <fpage>3030</fpage>
                    <lpage>44</lpage>
                    <pub-id pub-id-type="doi">10.1002/ijc.21731</pub-id>
                    <pub-id pub-id-type="medline">16404738</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <nlm-citation citation-type="web">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Davies</surname>
                            <given-names>SC</given-names>
                        </name>
                    </person-group>
                    <source>Infections and the rise of antimicrobial resistance</source>
                    <year>2011</year>
                    <access-date>2014-06-05</access-date>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://antibiotic-action.com/resources/chief-medical-officer-annual-report-volume-two-201-infections-and-the-rise-of-antimicrobial-resistance/">http://antibiotic-action.com/resources/chief-medical-officer-annual-report-volume-two-201-infections-and-the-rise-of-antimicrobial-resistance/</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6Q7ADsur0</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>German</surname>
                            <given-names>RR</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Lee</surname>
                            <given-names>LM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Horan</surname>
                            <given-names>JM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Milstein</surname>
                            <given-names>RL</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Pertowski</surname>
                            <given-names>CA</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Waller</surname>
                            <given-names>MN</given-names>
                        </name>
                        <collab>Guidelines Working Group Centers for Disease Control Prevention (CDC)</collab>
                    </person-group>
                    <article-title>Updated guidelines for evaluating public health surveillance systems: recommendations from the Guidelines Working Group</article-title>
                    <source>MMWR Recomm Rep</source>
                    <year>2001</year>
                    <month>07</month>
                    <day>27</day>
                    <volume>50</volume>
                    <issue>RR-13</issue>
                    <fpage>1</fpage>
                    <lpage>35; quiz CE1</lpage>
                    <pub-id pub-id-type="medline">18634202</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>May</surname>
                            <given-names>L</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Chretien</surname>
                            <given-names>JP</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Pavlin</surname>
                            <given-names>JA</given-names>
                        </name>
                    </person-group>
                    <article-title>Beyond traditional surveillance: applying syndromic surveillance to developing settings--opportunities and challenges</article-title>
                    <source>BMC Public Health</source>
                    <year>2009</year>
                    <volume>9</volume>
                    <fpage>242</fpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.biomedcentral.com/1471-2458/9/242" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1186/1471-2458-9-242</pub-id>
                    <pub-id pub-id-type="medline">19607669</pub-id>
                    <pub-id pub-id-type="pii">1471-2458-9-242</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2718884</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Welliver</surname>
                            <given-names>RC</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Cherry</surname>
                            <given-names>JD</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Boyer</surname>
                            <given-names>KM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Deseda-Tous</surname>
                            <given-names>JE</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Krause</surname>
                            <given-names>PJ</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Dudley</surname>
                            <given-names>JP</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Murray</surname>
                            <given-names>RA</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Wingert</surname>
                            <given-names>W</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Champion</surname>
                            <given-names>JG</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Freeman</surname>
                            <given-names>G</given-names>
                        </name>
                    </person-group>
                    <article-title>Sales of nonprescription cold remedies: a unique method of influenza surveillance</article-title>
                    <source>Pediatr Res</source>
                    <year>1979</year>
                    <month>09</month>
                    <volume>13</volume>
                    <issue>9</issue>
                    <fpage>1015</fpage>
                    <lpage>7</lpage>
                    <pub-id pub-id-type="medline">503653</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Wagner</surname>
                            <given-names>MM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Tsui</surname>
                            <given-names>FC</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Espino</surname>
                            <given-names>JU</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Dato</surname>
                            <given-names>VM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Sittig</surname>
                            <given-names>DF</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Caruana</surname>
                            <given-names>RA</given-names>
                        </name>
                        <name name-style="western">
                            <surname>McGinnis</surname>
                            <given-names>LF</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Deerfield</surname>
                            <given-names>DW</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Druzdzel</surname>
                            <given-names>MJ</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Fridsma</surname>
                            <given-names>DB</given-names>
                        </name>
                    </person-group>
                    <article-title>The emerging science of very early detection of disease outbreaks</article-title>
                    <source>J Public Health Manag Pract</source>
                    <year>2001</year>
                    <month>11</month>
                    <volume>7</volume>
                    <issue>6</issue>
                    <fpage>51</fpage>
                    <lpage>9</lpage>
                    <pub-id pub-id-type="medline">11710168</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <nlm-citation citation-type="web">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Barbeschi</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Healing</surname>
                            <given-names>T</given-names>
                        </name>
                    </person-group>
                    <source>World Health Organization</source>
                    <year>2008</year>
                    <access-date>2014-06-05</access-date>
                    <comment>Communicable disease alert and response for mass gatherings: Key considerations<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.who.int/csr/Mass_gatherings2.pdf">http://www.who.int/csr/Mass_gatherings2.pdf</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6Q7AIIRht</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Donkin</surname>
                            <given-names>H</given-names>
                        </name>
                    </person-group>
                    <article-title>The Cholera and Hagar's Well at Mecca</article-title>
                    <source>The Lancet</source>
                    <year>1883</year>
                    <month>08</month>
                    <volume>122</volume>
                    <issue>3128</issue>
                    <fpage>256</fpage>
                    <lpage>257</lpage>
                    <pub-id pub-id-type="doi">10.1016/S0140-6736(02)35905-1</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Morgan</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Gunneberg</surname>
                            <given-names>C</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Gunnell</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Healing</surname>
                            <given-names>TD</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Lamerton</surname>
                            <given-names>S</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Soltanpoor</surname>
                            <given-names>N</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Lewis</surname>
                            <given-names>DA</given-names>
                        </name>
                        <name name-style="western">
                            <surname>White</surname>
                            <given-names>DG</given-names>
                        </name>
                    </person-group>
                    <article-title>An outbreak of Campylobacter infection associated with the consumption of unpasteurised milk at a large festival in England</article-title>
                    <source>Eur J Epidemiol</source>
                    <year>1994</year>
                    <month>10</month>
                    <volume>10</volume>
                    <issue>5</issue>
                    <fpage>581</fpage>
                    <lpage>5</lpage>
                    <pub-id pub-id-type="medline">7859858</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Guti&#233;rrez</surname>
                            <given-names>I</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Litzroth</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Hammadi</surname>
                            <given-names>S</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Van Oyen</surname>
                            <given-names>H</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Gerard</surname>
                            <given-names>C</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Robesyn</surname>
                            <given-names>E</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Bots</surname>
                            <given-names>J</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Faidherbe</surname>
                            <given-names>MT</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Wuillaume</surname>
                            <given-names>F</given-names>
                        </name>
                    </person-group>
                    <article-title>Community transmission of influenza A (H1N1)v virus at a rock festival in Belgium, 2-5 July 2009</article-title>
                    <source>Euro Surveill</source>
                    <year>2009</year>
                    <month>08</month>
                    <day>6</day>
                    <volume>14</volume>
                    <issue>31</issue>
                    <fpage>1</fpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.eurosurveillance.org/ViewArticle.aspx?ArticleId=19294" />
                    </comment>
                    <pub-id pub-id-type="medline">19660245</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Loncarevic</surname>
                            <given-names>G</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Payne</surname>
                            <given-names>L</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kon</surname>
                            <given-names>P</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Petrovic</surname>
                            <given-names>V</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Dimitrijevic</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Knezevic</surname>
                            <given-names>T</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Medici</surname>
                            <given-names>S</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Milic</surname>
                            <given-names>N</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Nedelijkovic</surname>
                            <given-names>J</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Seke1</surname>
                            <given-names>K</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Coulombier</surname>
                            <given-names>D</given-names>
                        </name>
                    </person-group>
                    <article-title>Public health preparedness for two mass gathering events in the context of pandemic influenza (H1N1) 2009 - Serbia, July 2009</article-title>
                    <source>Eurosurveillance</source>
                    <year>2009</year>
                    <volume>14</volume>
                    <issue>31</issue>
                    <fpage>A4 1</fpage>
                    <lpage>3</lpage>
                    <pub-id pub-id-type="medline">19660246</pub-id>
                    <pub-id pub-id-type="pii">19296</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Botelho-Nevers</surname>
                            <given-names>E</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Gautret</surname>
                            <given-names>P</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Benarous</surname>
                            <given-names>L</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Charrel</surname>
                            <given-names>R</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Felkai</surname>
                            <given-names>P</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Parola</surname>
                            <given-names>P</given-names>
                        </name>
                    </person-group>
                    <article-title>Travel-related influenza A/H1N1 infection at a rock festival in Hungary: one virus may hide another one</article-title>
                    <source>J Travel Med</source>
                    <year>2010</year>
                    <volume>17</volume>
                    <issue>3</issue>
                    <fpage>197</fpage>
                    <lpage>8</lpage>
                    <pub-id pub-id-type="doi">10.1111/j.1708-8305.2010.00410.x</pub-id>
                    <pub-id pub-id-type="medline">20536890</pub-id>
                    <pub-id pub-id-type="pii">JTM410</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Johnson</surname>
                            <given-names>HA</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Wagner</surname>
                            <given-names>MM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Hogan</surname>
                            <given-names>WR</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Chapman</surname>
                            <given-names>W</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Olszewski</surname>
                            <given-names>RT</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Dowling</surname>
                            <given-names>J</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Barnas</surname>
                            <given-names>G</given-names>
                        </name>
                    </person-group>
                    <article-title>Analysis of Web access logs for surveillance of influenza</article-title>
                    <source>Stud Health Technol Inform</source>
                    <year>2004</year>
                    <volume>107</volume>
                    <issue>Pt 2</issue>
                    <fpage>1202</fpage>
                    <lpage>6</lpage>
                    <pub-id pub-id-type="medline">15361003</pub-id>
                    <pub-id pub-id-type="pii">D040005485</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Eysenbach</surname>
                            <given-names>G</given-names>
                        </name>
                    </person-group>
                    <article-title>Infodemiology: Tracking flu-related searches on the Web for syndromic surveillance</article-title>
                    <source>AMIA Annual Symposium Proceedings</source>
                    <year>2006</year>
                    <fpage>244</fpage>
                    <lpage>248</lpage>
                    <pub-id pub-id-type="medline">17238340</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1839505</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Mykhalovskiy</surname>
                            <given-names>E</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Weir</surname>
                            <given-names>L</given-names>
                        </name>
                    </person-group>
                    <article-title>The Global Public Health Intelligence Network and early warning outbreak detection: a Canadian contribution to global public health</article-title>
                    <source>Can J Public Health</source>
                    <year>2006</year>
                    <volume>97</volume>
                    <issue>1</issue>
                    <fpage>42</fpage>
                    <lpage>4</lpage>
                    <pub-id pub-id-type="medline">16512327</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Collier</surname>
                            <given-names>N</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kawazoe</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Jin</surname>
                            <given-names>L</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Shigematsu</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Dien</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Barrero</surname>
                            <given-names>RA</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Takeuchi</surname>
                            <given-names>K</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kawtrakul</surname>
                            <given-names>A</given-names>
                        </name>
                    </person-group>
                    <article-title>A multilingual ontology for infectious disease surveillance: rationale, design and challenges</article-title>
                    <source>Lang Resources &#38; Evaluation</source>
                    <year>2007</year>
                    <month>6</month>
                    <day>26</day>
                    <volume>40</volume>
                    <issue>3-4</issue>
                    <fpage>405</fpage>
                    <lpage>413</lpage>
                    <pub-id pub-id-type="doi">10.1007/s10579-007-9019-7</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Collier</surname>
                            <given-names>N</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Doan</surname>
                            <given-names>S</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kawazoe</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Goodwin</surname>
                            <given-names>RM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Conway</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Tateno</surname>
                            <given-names>Y</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Ngo</surname>
                            <given-names>QH</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Dien</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kawtrakul</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Takeuchi</surname>
                            <given-names>K</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Shigematsu</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Taniguchi</surname>
                            <given-names>K</given-names>
                        </name>
                    </person-group>
                    <article-title>BioCaster: detecting public health rumors with a Web-based text mining system</article-title>
                    <source>Bioinformatics</source>
                    <year>2008</year>
                    <month>12</month>
                    <day>15</day>
                    <volume>24</volume>
                    <issue>24</issue>
                    <fpage>2940</fpage>
                    <lpage>1</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://bioinformatics.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=18922806" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btn534</pub-id>
                    <pub-id pub-id-type="medline">18922806</pub-id>
                    <pub-id pub-id-type="pii">btn534</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2639299</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Tolentino</surname>
                            <given-names>H</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kamadjeu</surname>
                            <given-names>R</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Fontelo</surname>
                            <given-names>P</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>F</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Matters</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Pollack</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Madoff</surname>
                            <given-names>L</given-names>
                        </name>
                    </person-group>
                    <article-title>Scanning the emerging infectious diseases horizon-visualizing ProMED emails using EpiSPIDER</article-title>
                    <source>Advances in Disease Surveillance</source>
                    <year>2007</year>
                    <volume>2</volume>
                    <fpage>169</fpage>
                </nlm-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Brownstein</surname>
                            <given-names>JS</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Freifeld</surname>
                            <given-names>CC</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Madoff</surname>
                            <given-names>LC</given-names>
                        </name>
                    </person-group>
                    <article-title>Digital disease detection--harnessing the Web for public health surveillance</article-title>
                    <source>N Engl J Med</source>
                    <year>2009</year>
                    <month>05</month>
                    <day>21</day>
                    <volume>360</volume>
                    <issue>21</issue>
                    <fpage>2153</fpage>
                    <lpage>2157</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19423867" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1056/NEJMp0900702</pub-id>
                    <pub-id pub-id-type="medline">19423867</pub-id>
                    <pub-id pub-id-type="pii">NEJMp0900702</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2917042</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Freifeld</surname>
                            <given-names>CC</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Mandl</surname>
                            <given-names>KD</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Reis</surname>
                            <given-names>BY</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Brownstein</surname>
                            <given-names>JS</given-names>
                        </name>
                    </person-group>
                    <article-title>HealthMap: global infectious disease monitoring through automated classification and visualization of Internet media reports</article-title>
                    <source>J Am Med Inform Assoc</source>
                    <year>2008</year>
                    <volume>15</volume>
                    <issue>2</issue>
                    <fpage>150</fpage>
                    <lpage>7</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.bmj.com/cgi/pmidlookup?view=long&#38;pmid=18096908" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1197/jamia.M2544</pub-id>
                    <pub-id pub-id-type="medline">18096908</pub-id>
                    <pub-id pub-id-type="pii">M2544</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2274789</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref26">
                <label>26</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Keller</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Blench</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Tolentino</surname>
                            <given-names>H</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Freifeld</surname>
                            <given-names>CC</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Mandl</surname>
                            <given-names>KD</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Mawudeku</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Eysenbach</surname>
                            <given-names>G</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Brownstein</surname>
                            <given-names>JS</given-names>
                        </name>
                    </person-group>
                    <article-title>Use of unstructured event-based reports for global infectious disease surveillance</article-title>
                    <source>Emerg Infect Dis</source>
                    <year>2009</year>
                    <month>05</month>
                    <volume>15</volume>
                    <issue>5</issue>
                    <fpage>689</fpage>
                    <lpage>95</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.doi.org/10.3201/eid1505.081114" />
                    </comment>
                    <pub-id pub-id-type="doi">10.3201/eid1505.081114</pub-id>
                    <pub-id pub-id-type="medline">19402953</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2687026</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref27">
                <label>27</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Lyon</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Nunn</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Grossel</surname>
                            <given-names>G</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Burgman</surname>
                            <given-names>M</given-names>
                        </name>
                    </person-group>
                    <article-title>Comparison of Web-based biosecurity intelligence systems: BioCaster, EpiSPIDER and HealthMap</article-title>
                    <source>Transbound Emerg Dis</source>
                    <year>2012</year>
                    <month>06</month>
                    <volume>59</volume>
                    <issue>3</issue>
                    <fpage>223</fpage>
                    <lpage>32</lpage>
                    <pub-id pub-id-type="doi">10.1111/j.1865-1682.2011.01258.x</pub-id>
                    <pub-id pub-id-type="medline">22182229</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref28">
                <label>28</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Polgreen</surname>
                            <given-names>PM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>Y</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Pennock</surname>
                            <given-names>DM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Nelson</surname>
                            <given-names>FD</given-names>
                        </name>
                    </person-group>
                    <article-title>Using Internet searches for influenza surveillance</article-title>
                    <source>Clin Infect Dis</source>
                    <year>2008</year>
                    <month>12</month>
                    <day>1</day>
                    <volume>47</volume>
                    <issue>11</issue>
                    <fpage>1443</fpage>
                    <lpage>8</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.cid.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=18954267" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1086/593098</pub-id>
                    <pub-id pub-id-type="medline">18954267</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref29">
                <label>29</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Ginsberg</surname>
                            <given-names>J</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Mohebbi</surname>
                            <given-names>MH</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Patel</surname>
                            <given-names>RS</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Brammer</surname>
                            <given-names>L</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Smolinski</surname>
                            <given-names>MS</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Brilliant</surname>
                            <given-names>L</given-names>
                        </name>
                    </person-group>
                    <article-title>Detecting influenza epidemics using search engine query data</article-title>
                    <source>Nature</source>
                    <year>2009</year>
                    <month>02</month>
                    <day>19</day>
                    <volume>457</volume>
                    <issue>7232</issue>
                    <fpage>1012</fpage>
                    <lpage>4</lpage>
                    <pub-id pub-id-type="doi">10.1038/nature07634</pub-id>
                    <pub-id pub-id-type="medline">19020500</pub-id>
                    <pub-id pub-id-type="pii">nature07634</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref30">
                <label>30</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Carneiro</surname>
                            <given-names>HA</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Mylonakis</surname>
                            <given-names>E</given-names>
                        </name>
                    </person-group>
                    <article-title>Google Trends: a Web-based tool for real-time surveillance of disease outbreaks</article-title>
                    <source>Clin Infect Dis</source>
                    <year>2009</year>
                    <month>11</month>
                    <day>15</day>
                    <volume>49</volume>
                    <issue>10</issue>
                    <fpage>1557</fpage>
                    <lpage>64</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.cid.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=19845471" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1086/630200</pub-id>
                    <pub-id pub-id-type="medline">19845471</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref31">
                <label>31</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Corley</surname>
                            <given-names>CD</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Cook</surname>
                            <given-names>DJ</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Mikler</surname>
                            <given-names>AR</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Singh</surname>
                            <given-names>KP</given-names>
                        </name>
                    </person-group>
                    <article-title>Text and structural data mining of influenza mentions in Web and social media</article-title>
                    <source>Int J Environ Res Public Health</source>
                    <year>2010</year>
                    <month>02</month>
                    <volume>7</volume>
                    <issue>2</issue>
                    <fpage>596</fpage>
                    <lpage>615</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.mdpi.com/1660-4601/7/2/596" />
                    </comment>
                    <pub-id pub-id-type="doi">10.3390/ijerph7020596</pub-id>
                    <pub-id pub-id-type="medline">20616993</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2872292</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref32">
                <label>32</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Corley</surname>
                            <given-names>CD</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Mikler</surname>
                            <given-names>AR</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Singh</surname>
                            <given-names>KP</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Cook</surname>
                            <given-names>DJ</given-names>
                        </name>
                    </person-group>
                    <article-title>Monitoring influenza trends through mining social media</article-title>
                    <year>2009</year>
                    <conf-name>International Conference on Bioinformatics and Computational Biology</conf-name>
                    <conf-date>July 2009</conf-date>
                    <conf-loc>Las Vegas, NV</conf-loc>
                    <fpage>340</fpage>
                    <lpage>346</lpage>
                </nlm-citation>
            </ref>
            <ref id="ref33">
                <label>33</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Lampos</surname>
                            <given-names>V</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Cristianini</surname>
                            <given-names>N</given-names>
                        </name>
                    </person-group>
                    <article-title>Tracking the flu pandemic by monitoring the social web</article-title>
                    <year>2010</year>
                    <conf-name>Cognitive Information Processing (CIP)</conf-name>
                    <conf-date>14-16 June 2010</conf-date>
                    <conf-loc>Elba, Italy</conf-loc>
                    <fpage>411</fpage>
                    <lpage>416</lpage>
                    <pub-id pub-id-type="doi">10.1109/CIP.2010.5604088</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref34">
                <label>34</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Lampos</surname>
                            <given-names>V</given-names>
                        </name>
                        <name name-style="western">
                            <surname>De Bie</surname>
                            <given-names>T</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Cristianini</surname>
                            <given-names>N</given-names>
                        </name>
                    </person-group>
                    <article-title>Flu detector-tracking epidemics on Twitter</article-title>
                    <year>2010</year>
                    <conf-name>Machine Learning and Knowledge Discovery in Databases</conf-name>
                    <conf-date>2010</conf-date>
                    <conf-loc>Barcelona, Spain</conf-loc>
                    <fpage>599</fpage>
                    <lpage>602</lpage>
                </nlm-citation>
            </ref>
            <ref id="ref35">
                <label>35</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Culotta</surname>
                            <given-names>A</given-names>
                        </name>
                    </person-group>
                    <article-title>Towards detecting influenza epidemics by analyzing Twitter messages</article-title>
                    <source>Proceedings of the First Workshop on Social Media Analytics</source>
                    <year>2010</year>
                    <conf-name>The 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
                    <conf-date>2010</conf-date>
                    <conf-loc>Washington DC, District of Columbia</conf-loc>
                    <fpage>115</fpage>
                    <lpage>122</lpage>
                    <pub-id pub-id-type="doi">10.1145/1964858.1964874</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref36">
                <label>36</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Chew</surname>
                            <given-names>C</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Eysenbach</surname>
                            <given-names>G</given-names>
                        </name>
                    </person-group>
                    <article-title>Pandemics in the age of Twitter: content analysis of Tweets during the 2009 H1N1 outbreak</article-title>
                    <source>PLoS One</source>
                    <year>2010</year>
                    <volume>5</volume>
                    <issue>11</issue>
                    <fpage>e14118</fpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0014118" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0014118</pub-id>
                    <pub-id pub-id-type="medline">21124761</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2993925</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref37">
                <label>37</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Signorini</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Segre</surname>
                            <given-names>AM</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Polgreen</surname>
                            <given-names>PM</given-names>
                        </name>
                    </person-group>
                    <article-title>The use of Twitter to track levels of disease activity and public concern in the U.S. during the influenza A H1N1 pandemic</article-title>
                    <source>PLoS One</source>
                    <year>2011</year>
                    <volume>6</volume>
                    <issue>5</issue>
                    <fpage>e19467</fpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0019467" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0019467</pub-id>
                    <pub-id pub-id-type="medline">21573238</pub-id>
                    <pub-id pub-id-type="pii">PONE-D-10-02464</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3087759</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref38">
                <label>38</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Culotta</surname>
                            <given-names>A</given-names>
                        </name>
                    </person-group>
                    <article-title>Lightweight methods to estimate influenza rates and alcohol sales volume from Twitter messages</article-title>
                    <source>Lang Resources &#38; Evaluation</source>
                    <year>2012</year>
                    <month>5</month>
                    <day>13</day>
                    <volume>47</volume>
                    <issue>1</issue>
                    <fpage>217</fpage>
                    <lpage>238</lpage>
                    <pub-id pub-id-type="doi">10.1007/s10579-012-9185-0</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref39">
                <label>39</label>
                <nlm-citation citation-type="book">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>de Quincey</surname>
                            <given-names>E</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kostkova</surname>
                            <given-names>P</given-names>
                        </name>
                    </person-group>
                    <article-title>Early warning and outbreak detection using social networking websites: The potential of Twitter</article-title>
                    <source>Electronic Healthcare</source>
                    <year>2010</year>
                    <publisher-loc>Berlin</publisher-loc>
                    <publisher-name>Springer</publisher-name>
                    <fpage>21</fpage>
                    <lpage>24</lpage>
                </nlm-citation>
            </ref>
            <ref id="ref40">
                <label>40</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Szomszor</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kostkova</surname>
                            <given-names>P</given-names>
                        </name>
                        <name name-style="western">
                            <surname>De Quincey</surname>
                            <given-names>E</given-names>
                        </name>
                    </person-group>
                    <article-title>#swineflu: Twitter predicts swine flu outbreak in 2009</article-title>
                    <year>2010</year>
                    <month>12</month>
                    <conf-name>eHealth</conf-name>
                    <conf-date>13-15 December 2010</conf-date>
                    <conf-loc>Casablanca, Morocco</conf-loc>
                    <publisher-name>Electronic Healthcare</publisher-name>
                    <pub-id pub-id-type="doi">10.1007/978-3-642-23635-8_3</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref41">
                <label>41</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Porter</surname>
                            <given-names>MF</given-names>
                        </name>
                    </person-group>
                    <article-title>An algorithm for suffix stripping</article-title>
                    <source>Program: electronic library and information systems</source>
                    <year>1980</year>
                    <volume>14</volume>
                    <issue>3</issue>
                    <fpage>130</fpage>
                    <lpage>137</lpage>
                    <pub-id pub-id-type="doi">10.1108/eb046814</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref42">
                <label>42</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Yom-Tov</surname>
                            <given-names>E</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Gabrilovich</surname>
                            <given-names>E</given-names>
                        </name>
                    </person-group>
                    <article-title>Postmarket drug surveillance without trial costs: discovery of adverse drug reactions through large-scale analysis of web search queries</article-title>
                    <source>J Med Internet Res</source>
                    <year>2013</year>
                    <volume>15</volume>
                    <issue>6</issue>
                    <fpage>e124</fpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.jmir.org/2013/6/e124/" />
                    </comment>
                    <pub-id pub-id-type="doi">10.2196/jmir.2614</pub-id>
                    <pub-id pub-id-type="medline">23778053</pub-id>
                    <pub-id pub-id-type="pii">v15i6e124</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3713931</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref43">
                <label>43</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Sartwell</surname>
                            <given-names>PE</given-names>
                        </name>
                    </person-group>
                    <article-title>The distribution of incubation periods of infectious disease</article-title>
                    <source>Am J Epidemiol</source>
                    <year>1950</year>
                    <month>05</month>
                    <volume>51</volume>
                    <issue>3</issue>
                    <fpage>310</fpage>
                    <lpage>318</lpage>
                    <pub-id pub-id-type="medline">15413610</pub-id>
                    <pub-id pub-id-type="pmcid">7879783</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref44">
                <label>44</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Benjamini</surname>
                            <given-names>Y</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Hochberg</surname>
                            <given-names>Y</given-names>
                        </name>
                    </person-group>
                    <article-title>Controlling the False Discovery Rate - a new and powerful approach to multiple testing</article-title>
                    <source>J Roy Stat Soc B</source>
                    <year>1995</year>
                    <volume>57</volume>
                    <issue>1</issue>
                    <fpage>289</fpage>
                    <lpage>300</lpage>
                    <pub-id pub-id-type="doi">10.2307/2346101</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref45">
                <label>45</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Moore</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Cooper</surname>
                            <given-names>G</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Wagner</surname>
                            <given-names>M</given-names>
                        </name>
                    </person-group>
                    <article-title>WSARE: What's strange about recent events?</article-title>
                    <source>J Urban Health</source>
                    <year>2003</year>
                    <volume>80</volume>
                    <issue>1</issue>
                    <fpage>i66</fpage>
                    <lpage>i75</lpage>
                    <pub-id pub-id-type="doi">10.1007/PL00022317</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref46">
                <label>46</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Pelleg</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Yom-Tov</surname>
                            <given-names>E</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Maarek</surname>
                            <given-names>Y</given-names>
                        </name>
                    </person-group>
                    <article-title>Can you believe an anonymous contributor? On truthfulness in Yahoo! Answers</article-title>
                    <year>2012</year>
                    <conf-name>ASE/IEEE International Conference on Social Computing</conf-name>
                    <conf-date>3-6 September 2012</conf-date>
                    <conf-loc>Amsterdam, The Netherlands</conf-loc>
                    <fpage>411</fpage>
                    <lpage>420</lpage>
                    <pub-id pub-id-type="doi">10.1109/SocialCom-PASSAT.2012.13</pub-id>
                </nlm-citation>
            </ref>
        </ref-list>
    </back>
</article>
