<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v19i12e416</article-id>
    <article-id pub-id-type="pmid">29269339</article-id>
    <article-id pub-id-type="doi">10.2196/jmir.8184</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>Estimating the Population Impact of a New Pediatric Influenza Vaccination Program in England Using Social Media Content</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Collier</surname>
          <given-names>Nigel</given-names>
        </name>
      </contrib>
      <contrib contrib-type="editor">
        <name>
          <surname>Conway</surname>
          <given-names>Mike</given-names>
        </name>
      </contrib>
      <contrib contrib-type="editor">
        <name>
          <surname>Limsopatham</surname>
          <given-names>Nut</given-names>
        </name>
      </contrib>
      <contrib contrib-type="editor">
        <name>
          <surname>Lampos</surname>
          <given-names>Vasileios</given-names>
        </name>
      </contrib>
      <contrib contrib-type="editor">
        <name>
          <surname>Culotta</surname>
          <given-names>Aron</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Saha</surname>
          <given-names>Koustuv</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Lei</surname>
          <given-names>Jianbo</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Basch</surname>
          <given-names>Corey</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Wark</surname>
          <given-names>Petra</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes" equal-contrib="yes">
      <name name-style="western">
        <surname>Wagner</surname>
        <given-names>Moritz</given-names>
      </name>
      <degrees>MMath</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>Public Health England</institution>
        <addr-line>61 Colindale Ave</addr-line>
        <addr-line>London, NW9 5EQ</addr-line>
        <country>United Kingdom</country>
        <phone>44 7539078912</phone>
        <email>moritz.wagner.16@ucl.ac.uk</email>
      </address>  
      <xref rid="aff2" ref-type="aff">2</xref>
      <xref rid="aff3" ref-type="aff">3</xref>
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-0204-3736</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2" equal-contrib="yes">
        <name name-style="western">
          <surname>Lampos</surname>
          <given-names>Vasileios</given-names>
        </name>
        <degrees>MSc, PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-8555-2063</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Yom-Tov</surname>
          <given-names>Elad</given-names>
        </name>
        <degrees>BSc, MSc, PhD</degrees>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-2380-4584</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Pebody</surname>
          <given-names>Richard</given-names>
        </name>
        <degrees>MBChB, PhD</degrees>
        <xref rid="aff1" ref-type="aff">1</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9069-2885</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Cox</surname>
          <given-names>Ingemar J</given-names>
        </name>
        <degrees>BSc, PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <xref rid="aff6" ref-type="aff">6</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-6662-417X</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
      <sup>1</sup>
      <institution>Public Health England</institution>
      <addr-line>London</addr-line>
      <country>United Kingdom</country>
    </aff>
    <aff id="aff2">
      <sup>2</sup>
      <institution>University College London</institution>
      <addr-line>London</addr-line>
      <country>United Kingdom</country>
    </aff>
    <aff id="aff3">
      <sup>3</sup>
      <institution>London School of Hygiene and Tropical Medicine</institution>
      <addr-line>London</addr-line>
      <country>United Kingdom</country>
    </aff>
    <aff id="aff4">
    <sup>4</sup>
    <institution>Department of Computer Science</institution>
    <institution>University College London</institution>  
    <addr-line>London</addr-line>
    <country>United Kingdom</country></aff>
    <aff id="aff5">
      <sup>5</sup>
      <institution>Microsoft Research</institution>
      <addr-line>Herzliya</addr-line>
      <country>Israel</country>
    </aff>
    <aff id="aff6">
    <sup>6</sup>
    <institution>Department of Computer Science</institution>
    <institution>University of Copenhagen</institution>  
    <addr-line>Copenhagen</addr-line>
    <country>Denmark</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Moritz Wagner 
      <email>moritz.wagner.16@ucl.ac.uk</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><month>12</month><year>2017</year></pub-date>
    <pub-date pub-type="epub">
      <day>21</day>
      <month>12</month>
      <year>2017</year>
    </pub-date>
    <volume>19</volume>
    <issue>12</issue>
    <elocation-id>e416</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>15</day>
        <month>6</month>
        <year>2017</year>
      </date>
      <date date-type="rev-request">
        <day>1</day>
        <month>9</month>
        <year>2017</year>
      </date>
      <date date-type="rev-recd">
        <day>15</day>
        <month>9</month>
        <year>2017</year>
      </date>
      <date date-type="accepted">
        <day>2</day>
        <month>10</month>
        <year>2017</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Moritz Wagner, Vasileios Lampos, Elad Yom-Tov, Richard Pebody, Ingemar J Cox. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 21.12.2017.</copyright-statement>
    <copyright-year>2017</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://www.jmir.org/2017/12/e416/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>The rollout of a new childhood live attenuated influenza vaccine program was launched in England in 2013, which consisted of a national campaign for all 2 and 3 year olds and several pilot locations offering the vaccine to primary school-age children (4-11 years of age) during the influenza season. The 2014/2015 influenza season saw the national program extended to include additional pilot regions, some of which offered the vaccine to secondary school children (11-13 years of age) as well.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>We utilized social media content to obtain a complementary assessment of the population impact of the programs that were launched in England during the 2013/2014 and 2014/2015 flu seasons. The overall community-wide impact on transmission in pilot areas was estimated for the different age groups that were targeted for vaccination.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>A previously developed statistical framework was applied, which consisted of a nonlinear regression model that was trained to infer influenza-like illness (ILI) rates from Twitter posts originating in pilot (school-age vaccinated) and control (unvaccinated) areas. The control areas were then used to estimate ILI rates in pilot areas, had the intervention not taken place. These predictions were compared with their corresponding Twitter-based ILI estimates.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>Results suggest a reduction in ILI rates of 14% (1-25%) and 17% (2-30%) across all ages in only the primary school-age vaccine pilot areas during the 2013/2014 and 2014/2015 influenza seasons, respectively. No significant impact was observed in areas where two age cohorts of secondary school children were vaccinated.</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>These findings corroborate independent assessments from traditional surveillance data, thereby supporting the ongoing rollout of the program to primary school-age children and providing evidence of the value of social media content as an additional syndromic surveillance tool.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>health intervention</kwd>
      <kwd>influenza</kwd>
      <kwd>vaccination</kwd>
      <kwd>social media</kwd>
      <kwd>Twitter</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In 2012 the Joint Committee on Vaccination and Immunisation recommended the extension of the annual influenza vaccination campaign to include all healthy children aged 2 to 16 years of age in England [<xref ref-type="bibr" rid="ref1">1</xref>]. This decision was informed by influenza transmission modeling done using an evidence-synthesis approach, showing that vaccination could not only protect the children themselves from infection, but also decrease influenza transmission in the general population. This finding included the indirect protection of at-risk groups, such as people over 65 years of age or those with underlying clinical risk factors [<xref ref-type="bibr" rid="ref2">2</xref>]. The phased rollout of the live attenuated influenza vaccine (LAIV) program began during the 2013/2014 influenza season. In the first season, the program offered vaccinations to all 2 and 3-year-olds throughout England. A number of geographically distinct pilot regions also offered vaccinations to primary school age children (4-11 years of age) to determine the optimal model of delivery to school-age children. For the 2014/2015 influenza season, the program was extended nationally to offer vaccinations to all 2 to 4-year-olds. Pilot locations were added that offered vaccinations to children either (1) of primary school age (<italic>Primary school</italic>; 4-11 years), (2) the first two years of secondary school age (<italic>Secondary school</italic>, 11-13 years), or (3) both (<italic>Primary and Secondary school</italic>; 4-13 years) to determine optimal models of delivery.</p>
      </sec>
      <sec>
        <title>Motivation</title>
        <p>Public Health England (PHE) has been using a variety of surveillance systems to assess the overall population impact of the childhood influenza campaign in children of school-age on influenza epidemiology to validate the direct and indirect effects of vaccinating this age group. The pilot locations for 2014/2015 are of particular interest, as the variation in target groups may offer further insights into the optimal strategies for the national rollout. During the 2014/2015 campaign, most influenza indicators through traditional surveillance systems in both targeted and nontargeted age groups demonstrated a significant reduction in pilot areas that offered the vaccine to primary school age children. However, there was little impact in pilot areas, where only two age cohorts of secondary school age children were vaccinated [<xref ref-type="bibr" rid="ref3">3</xref>]. These surveillance indicators were based on health systems ranging from General Practitioners’ consultation rates to excess mortality.</p>
        <p>Whilst such results are important in estimating the intervention’s effects on health care services, online user-generated information offers a complementary data source that can provide additional insights into the impact of such campaigns on the wider community, including those persons that do not consult the health care system. Our study also highlights the potential value of user-generated information in the absence of routine evaluation systems. Internet-based surveillance systems are being viewed as novel logistically and economically viable developments that offer great potential as an extension of traditional surveillance systems [<xref ref-type="bibr" rid="ref4">4</xref>]. Recent research efforts have shown that in combination with <italic>machine learning</italic> techniques, data from social media or search engines can be used to accurately estimate disease-related indicators such as influenza-like illness (ILI) rates [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. These technologies provide health monitoring systems with additional, publicly available, and potentially more timely sources of data for syndromic surveillance. Furthermore, compared to traditional surveillance systems, user-generated content may offer insights about a wider range of the population, including the bottom part of the disease population pyramid (ie, those that do not seek medical attention) [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
        <p>For the 2013/2014 pilot areas, in order to provide further evidence of the community-wide effects of vaccinating children with influenza vaccine, Lampos et al made use of online user-generated content in combination with statistical natural language processing techniques to estimate ILI rates in the population [<xref ref-type="bibr" rid="ref9">9</xref>]. By matching nonvaccinated control areas with pilot areas and using flu-related Twitter posts or Bing search queries from these locations, the impact of the campaign within the <italic>Primary school</italic> age pilot areas was estimated, showing a significant decrease (22% to 33% reduction) in influenza transmission in the general population in these pilot areas compared to corresponding control areas [<xref ref-type="bibr" rid="ref9">9</xref>]. PHE’s estimates also showed evidence of a reduction in influenza transmission in targeted and nontargeted age groups in pilot areas compared to nonpilot areas, based on a variety of influenza indicators during a season dominated by circulation of influenza A(H1N1)pdm09 [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      </sec>
      <sec>
        <title>Aim</title>
        <p>The work in this paper applies the same statistical framework as Lampos et al [<xref ref-type="bibr" rid="ref9">9</xref>] (with a slightly improved supervised learning approach) on Twitter data for the influenza season of 2014/2015. We aim to assess the impact of influenza vaccine pilot trials in school age children on influenza transmission in those pilot areas. The 2014/2015 season was dominated by circulation of influenza A(h3N2) and influenza B. In addition, we examined the impact of vaccinating different target populations, specifically primary and/or secondary school-age children, on influenza rates in the general population. This analysis provides further insights into the most effective strategies for reducing community-wide influenza transmission. This work also aims to reevaluate the hypothesis that a statistical framework based on online user-generated content can form a valid source for more fine-grained influenza surveillance tasks, such as estimating the impact of a targeted intervention. We repeated the analysis for the 2013/2014 LAIV campaign that was previously studied in Lampos et al [<xref ref-type="bibr" rid="ref9">9</xref>], but with revised pilot and control areas, for consistency with our study for the 2014/2015 season.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Sources</title>
        <p>Two data sources were used for the experiments: geo-located Twitter posts related to ILI and official ILI rates provided by the Royal College of General Practitioners (RCGP) [<xref ref-type="bibr" rid="ref12">12</xref>], the latter defining the <italic>ground truth</italic>. In addition, boundary data and population estimates from the Office for National Statistics (ONS) [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] were used to map the vaccine pilot and control areas.</p>
        <sec>
          <title>Twitter Data</title>
          <p>The Twitter data consisted of all exactly geo-located Twitter posts in England from August 29, 2011 to August 30, 2015, which comprise approximately 1% of all tweets made by users in England. This number is a rough estimate based on approximately 20% of the United Kingdom population using Twitter, with 33% of active users assumed to be posting 5 tweets per day [<xref ref-type="bibr" rid="ref15">15</xref>]. Our dataset consists of 350,000 geo-located tweets per day on average. As in Lampos et al [<xref ref-type="bibr" rid="ref9">9</xref>], the same initial list of 36 <italic>n</italic>-grams (phrases with <italic>n</italic> words) related to ILI was created manually. Then, based on frequent cooccurrence with this list in the Twitter time series data, a set of 217 <italic>n</italic>-grams was extracted (<italic>n</italic>&#60;5; see <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>).</p>
          <p>The RCGP ILI rates used for model learning were only available on a weekly basis, so frequency rates of this set of <italic>n</italic>-grams for a period of 7 days prior to any given day were computed, and formed the explanatory variables. To estimate the impact on the pilot areas, <italic>n</italic>-gram frequencies of tweets geo-located in the chosen pilot and control areas during the intervention period were used.</p>
        </sec>
        <sec>
          <title>Official Health Reports</title>
          <p>Weekly ILI estimates were provided by the RCGP, a sentinel network of approximately 100 practices in England, which covers a registered population of approximately 1 million persons [<xref ref-type="bibr" rid="ref12">12</xref>]. These ILI estimates represent the weekly incidence rate of ILI cases/consultations per 100,000 patients registered with eligible practices during that week [<xref ref-type="bibr" rid="ref12">12</xref>]. The data used cover the period from August 29, 2011 to August 30, 2015 for England.</p>
        </sec>
        <sec>
          <title>Pilot and Control Areas</title>
          <p>A total number of 140 local authorities implemented vaccinations as part of the pilot program. To create a suitable list of pilot areas for the impact assessment, these areas were combined on a county level, where possible. This list included a large amount of <italic>Secondary school</italic> pilot areas (37), so only the most populated ones were considered, whilst ensuring an even geographical distribution throughout the country. The geographical distribution and the areas’ population sizes were defined using ONS boundary data and population estimates of England, respectively [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Of the 7 <italic>Primary and Secondary school</italic> pilot areas, 3 were eliminated due to small size or because they were enclosed within another pilot area. Pilot areas involving special schools were ignored, as these included only a small number of schools and were thus unlikely to provide any significant community-wide benefits. This preprocessing resulted in 6 <italic>Primary school</italic>, 4 <italic>Primary and Secondary school</italic>, and 7 <italic>Secondary school</italic> pilot areas.</p>
          <p>A list of eligible control locations was chosen according to the following criteria: appropriate distance from pilot areas, a moderate population size, and a plausible geographical spread. These criteria resulted in a list of 16 control areas. Nonoverlapping boundary rectangles represented by their North-East and South-West corners were created around the chosen pilot and control areas. The geographical distribution of the pilot and control areas is shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>. <xref ref-type="table" rid="table1">Table 1</xref> lists the pilot areas considered for this study. For a full list of control and pilot areas, see <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Geographical distribution of the pilot and control areas chosen for the study with their corresponding boundary boxes. Control areas with red boxes have a distance of at least 10 km to any pilot area. The “Secondary” and “Primary and Secondary” pilot areas that were excluded from the study are shown without boundary boxes and in a lighter shade of blue and green, respectively. Contains National Statistics and OS data, Crown copyright and database right.</p>
            </caption>
            <graphic xlink:href="jmir_v19i12e416_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Pilot areas considered for this study during the 2014/2015 LAIV program with their respective population size [<xref ref-type="bibr" rid="ref14">14</xref>] and geographical boundary rectangle corner coordinates. Pilot areas that were also used or have partial overlap with the ones used in the 2013/2014 LAIV program are highlighted in italics.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="190"/>
              <col width="270"/>
              <col width="160"/>
              <col width="190"/>
              <col width="190"/>
              <thead>
                <tr valign="top">
                  <td><break/>Location</td>
                  <td><break/>Pilot</td>
                  <td><break/>Population</td>
                  <td>North-East corner<sup>a</sup></td>
                  <td>South-West corner<sup>b</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Thurrock</td>
                  <td>Primary school</td>
                  <td>163,270</td>
                  <td>51.568, 0.551</td>
                  <td>51.448, 0.334</td>
                </tr>
                <tr valign="top">
                  <td><italic>Gateshead</italic></td>
                  <td><italic>Primary school</italic></td>
                  <td><italic>200,505</italic></td>
                  <td><italic>54.984, -1.510</italic></td>
                  <td><italic>54.878, -1.853</italic></td>
                </tr>
                <tr valign="top">
                  <td>South Tyneside</td>
                  <td>Primary school</td>
                  <td>148,740</td>
                  <td>55.011, -1.352</td>
                  <td>54.928, -1.536</td>
                </tr>
                <tr valign="top">
                  <td>Sunderland</td>
                  <td>Primary school</td>
                  <td>276,889</td>
                  <td>54.944, -1.346</td>
                  <td>54.799, -1.569</td>
                </tr>
                <tr valign="top">
                  <td><italic>Cumbria</italic></td>
                  <td><italic>Primary school</italic></td>
                  <td><italic>497,874</italic></td>
                  <td><italic>55.189, -2.159</italic></td>
                  <td><italic>54.040, -3.641</italic></td>
                </tr>
                <tr valign="top">
                  <td><italic>Essex</italic></td>
                  <td><italic>Primary school</italic></td>
                  <td><italic>1,431,953</italic></td>
                  <td><italic>52.093, 1.297</italic></td>
                  <td><italic>51.632, -0.020</italic></td>
                </tr>
                <tr valign="top">
                  <td>Lancashire</td>
                  <td>Secondary school</td>
                  <td>1,184,735</td>
                  <td>54.240, -2.045</td>
                  <td>53.667, -3.085</td>
                </tr>
                <tr valign="top">
                  <td>Birmingham</td>
                  <td>Secondary school</td>
                  <td>1,101,360</td>
                  <td>52.609, -1.729</td>
                  <td>52.381, -2.034</td>
                </tr>
                <tr valign="top">
                  <td>Norfolk</td>
                  <td>Secondary school</td>
                  <td>877,710</td>
                  <td>52.993, 1.745</td>
                  <td>52.355, 0.154</td>
                </tr>
                <tr valign="top">
                  <td>Leeds</td>
                  <td>Secondary school</td>
                  <td>766,399</td>
                  <td>53.946, -1.290</td>
                  <td>53.699, -1.800</td>
                </tr>
                <tr valign="top">
                  <td>Suffolk</td>
                  <td>Secondary school</td>
                  <td>738,512</td>
                  <td>52.550, 1.769</td>
                  <td>51.932, 0.340</td>
                </tr>
                <tr valign="top">
                  <td>Lincolnshire</td>
                  <td>Secondary school</td>
                  <td>731,516</td>
                  <td>53.616, 0.358</td>
                  <td>52.640, -0.821</td>
                </tr>
                <tr valign="top">
                  <td>Shropshire</td>
                  <td>Secondary school</td>
                  <td>310,121</td>
                  <td>52.998, -2.233</td>
                  <td>52.306, -3.236</td>
                </tr>
                <tr valign="top">
                  <td><italic>Bury</italic></td>
                  <td><italic>Primary and Secondary school</italic></td>
                  <td><italic>187,474</italic></td>
                  <td><italic>53.667, -2.234</italic></td>
                  <td><italic>53.512, -2.383</italic></td>
                </tr>
                <tr valign="top">
                  <td>Salford</td>
                  <td>Primary and Secondary school</td>
                  <td>242,040</td>
                  <td>53.542, -2.245</td>
                  <td>53.416, -2.490</td>
                </tr>
                <tr valign="top">
                  <td><italic>Havering</italic></td>
                  <td><italic>Primary and Secondary school</italic><sup>c</sup></td>
                  <td><italic>245,974</italic></td>
                  <td><italic>51.632, 0.334</italic></td>
                  <td><italic>51.484, 0.138</italic></td>
                </tr>
                <tr valign="top">
                  <td><italic>Leicestershire</italic></td>
                  <td><italic>Primary and Secondary school</italic></td>
                  <td><italic>667,905</italic></td>
                  <td><italic>52.948, -0.664</italic></td>
                  <td><italic>52.392, -1.598</italic></td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>Longitude and latitude of the North-East edge of the bounding box</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>Longitude and latitude of the South-West edge of the bounding box</p>
              </fn>
              <fn id="table1fn3">
                <p><sup>c</sup>The secondary school program in Havering included the year 7 cohorts only (11-12 years)</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
      



<sec>
        <title>Statistical Framework</title>
        <p>The following sections provide a brief outline of the statistical framework that was implemented. Apart from a slightly improved supervised learning approach, this framework is based on the work by Lampos et al [<xref ref-type="bibr" rid="ref9">9</xref>], in which it is described and validated in more detail. The method consists of first learning a nonlinear regression model to estimate ILI rates from <italic>n</italic>-grams based on user-generated content (tweets in this case). Thereafter, by making use of inferred ILI rates in matched pilot and control regions, a linear modeling approach was applied to assess the potential impact of the intervention in the pilot areas.</p>
        <sec>
          <title>Estimating Disease Rates Using a Gaussian Process</title>
          <p>The majority of techniques used to acquire infectious disease estimates from user-generated data involve the use of linear regression models [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. Lampos et al showed that nonlinear methods can improve model performance, especially when working with a smaller feature space consisting of varying <italic>n</italic>-gram sizes [<xref ref-type="bibr" rid="ref8">8</xref>]. The authors proposed the use of Gaussian Processes (GPs) to model ILI rates and successfully applied these to Twitter, Google, and Bing data [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. See below for details of the GP model used in this study.</p>
          <p>Let <bold>X</bold>∈ℝ<sup>N×</sup><sup>M</sup> be the observation matrix with <italic>N</italic> weeks and <italic>M</italic> frequency rates of <italic>n</italic>-gram features. Then given inputs <bold>x,x'</bold>∈ℝ<sup>M</sup> (representing rows of <bold>X</bold>), a GP can be defined as a statistical distribution for which any finite linear combination of samples is normally distributed and is written as:</p>
          <graphic xlink:href="jmir_v19i12e416_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>Here μ(<bold>x</bold>) and k(<bold>x,x'</bold>) represent the mean and covariance function (or kernel), respectively [<xref ref-type="bibr" rid="ref19">19</xref>]. By assuming that μ(<bold>x</bold>)=0∀ <italic>i</italic>=1,…, <italic>N</italic>, the distribution is entirely determined by its covariance function. As our core kernel, the sum of two differently parameterized Matérn functions (k<sub>M</sub>) [<xref ref-type="bibr" rid="ref20">20</xref>], with degrees of freedom <italic>v</italic>=3/2 was found to be the most suitable for estimating ILI rates from Twitter data:</p>
          <graphic xlink:href="jmir_v19i12e416_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>where <italic>σ</italic><sub>m</sub> represents the overall level of variance and <italic>l</italic><sub>m</sub> a characteristic length scale. Assuming that different <italic>n</italic>-gram sizes may vary in their usage and are likely to have a more concise semantic interpretation with an increasing <italic>n</italic>, we model them with different kernels. The fact that the sum of covariance functions forms a valid covariance function in itself allows for this and we have:</p>
          <graphic xlink:href="jmir_v19i12e416_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>where <bold><italic>g</italic> </bold><sub>n</sub> represents the features that belong to each <italic>n</italic>-gram category and <italic>C</italic>=3 is the number of <italic>n</italic>-gram categories (3-grams and 4-grams are merged in this particular model). To model noise, we use the sum of a squared exponential:</p>
          <graphic xlink:href="jmir_v19i12e416_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>and a noise function:</p>
          <graphic xlink:href="jmir_v19i12e416_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>(δ is a Kronecker delta function), as defined in [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
          <p>GP regression involves minimizing the negative log-marginal likelihood function:</p>
          <graphic xlink:href="jmir_v19i12e416_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>where <bold>y</bold> denotes the ILI rates time-series, (<bold>K</bold>)<italic><sub>ij</sub></italic>=k(<bold>x</bold><sub>i</sub>,<bold>x</bold><sub>j</sub>) and <bold>μ</bold>=(μ(<bold>x</bold><sub>1</sub>),…,μ(<bold>x</bold><italic><sub>N</sub></italic>)). Once the model is learnt, newly observed feature frequency rates <bold>x<sub>*</sub></bold> result in new ILI rate estimates <bold>y<sub>*</sub></bold> by computing E[<bold>y<sub>*</sub></bold>&#124;<bold>y</bold>,Ω,<bold>x<sub>*</sub></bold>], the mean of the posterior predictive distribution. The performance of the model was measured using a 10-fold cross validation (random temporal splits) on the training set, using the average Pearson correlation (<italic>r</italic>) and the mean absolute error (MAE).</p>
        </sec>
        <sec>
          <title>Estimating the Impact of the LAIV Program</title>
          <p>Once the GP model was trained, the impact of the LAIV campaign in pilot areas could be estimated using the methodology outlined in Lampos et al, Section 3.3 [<xref ref-type="bibr" rid="ref9">9</xref>], which we briefly describe here as well.</p>
          <p>Given a set of pilot and control areas, <italic>n</italic>-gram frequencies of Twitter posts geo-located in those areas are extracted for a period before and during the intervention. ILI rate estimates can then be computed for all areas and supersets of areas using a pretrained GP model and we denote these with <bold>q</bold><sub>v</sub> and <bold>q</bold><sub>c</sub> for pilot and control areas, respectively. By looking at these ILI estimates for a number of weeks, τ={ <italic>t</italic><sub>1</sub>,…, <italic>t<sub>N</sub></italic> }, prior to the intervention, control and pilot locations with similar influenza activity can be matched based on a strong Pearson correlation, <inline-graphic xlink:href="jmir_v19i12e416_fig9.png" mimetype="image" xlink:type="simple"/>. Assuming a linear relationship in ILI rates between locations with similar influenza activity, a linear regression model can be learnt using <inline-graphic xlink:href="jmir_v19i12e416_fig10.png" mimetype="image" xlink:type="simple"/>and <inline-graphic xlink:href="jmir_v19i12e416_fig11.png" mimetype="image" xlink:type="simple"/>(ie, the ILI estimates prior to the intervention in the various matched area pairs):</p>
          <graphic xlink:href="jmir_v19i12e416_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>where <italic>ω,β,ε<sub>i</sub></italic> denote the regression’s weight and intercept, and independent, zero-centered noise, respectively. Using <bold>q</bold><sub>c</sub>, the ILI estimates in the control areas during the intervention, this linear model can then predict the hypothetical ILI rates in pilot locations during the intervention had the intervention not taken place:</p>
          <graphic xlink:href="jmir_v19i12e416_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>where <bold>b</bold>∈ℝ<sup>N</sup> with (<bold>b</bold>)<sub>k</sub>= <italic>β∀k</italic>=1,…, <italic>N</italic>.</p>
          <p>Comparing these hypothetical ILI rates to the ILI rates estimated by the GP model during the intervention allows the impact of the campaign to be estimated. The following measures were applied:</p>
          <graphic xlink:href="jmir_v19i12e416_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <graphic xlink:href="jmir_v19i12e416_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          <p>where <inline-graphic xlink:href="jmir_v19i12e416_fig16.png" mimetype="image" xlink:type="simple"/>denotes the mean value of <bold>q</bold>. Thus, <italic>δ<sub>v</sub></italic> and <italic>θ<sub>v</sub></italic> measure the absolute and relative mean impact of the intervention, respectively. Confidence intervals for these measures are produced using bootstrap sampling [<xref ref-type="bibr" rid="ref21">21</xref>]. This calculation involves sampling with replacement the residuals <italic>ε<sub>i</sub></italic> of the linear regression, adding them to the fitted values, and then running the linear model for these, which produces estimates for <italic>β</italic> and <italic>ω</italic>. These values are then applied to a sampled (with replacement) set of <bold>q</bold><sub>v</sub> and <bold>q</bold><sub>c</sub>. Repeating this procedure 100,000 times creates sets of estimates for <italic>δ<sub>v</sub></italic> and <italic>θ<sub>v</sub></italic> from which we can derive confidence intervals using the 0.025 and 0.975 quantiles, provided that their distributions are unimodal and symmetric. Results are considered statistically significant if absolute values are higher than two standard deviations of the bootstrap estimates [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We present an assessment of the impact of the childhood LAIV campaign during the 2013/2014 and 2014/2015 influenza seasons based on the previously described methodology. The GP model was trained on RCGP ILI rates in England and <xref ref-type="fig" rid="figure2">Figure 2</xref> shows the RCGP ILI rates used, with the preintervention correlation period and the two impact assessment periods highlighted.</p>
      <fig id="figure2" position="float">
        <label>Figure 2</label>
        <caption>
          <p>Weekly influenza-like illness (ILI) rate (per 100,000) provided by the Royal College of General Practitioners (RCGP) in England with the pre-intervention correlation period highlighted in green and the two impact assessment periods (2013/14 and 2014/15 influenza seasons) highlighted in red.</p>
        </caption>
        <graphic xlink:href="jmir_v19i12e416_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <sec>
        <title>Performance of the Supervised Model for Estimating ILI Rates</title>
        <p>A GP regression model was trained using weekly Twitter data geo-located in England from August 29, 2011 to August 30, 2015 and the corresponding RCGP ILI rates. Based on a 10-fold cross validation, an average Pearson correlation <italic>r</italic>=0.84 with a standard deviation of 0.08 and average MAE of 2.42 (weekly ILI rate per 100,000 people) with a standard deviation of 0.52 were measured. This approach is in line with the performance of the GP model used in the previous impact assessment [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      </sec>
      <sec>
        <title>Impact Estimates of the LAIV Program</title>
        <p>Using the GP model trained on a national level (England), ILI rates for the chosen pilot locations were estimated. This was done for individual pilot locations, the set of all pilot locations, and sets of pilot locations in which the same cohorts were vaccinated (ie, <italic>Primary school</italic>, <italic>Secondary school</italic>). An exhaustive search of all possible combinations of control areas was performed. These combinations of control locations were matched to the sets of pilot locations during a period prior to the start of the LAIV campaign (August 29, 2011 to September 1, 2013) based on similar influenza activity, as measured by Pearson correlation. The 2013/2014 influenza season is not included in this correlation phase, as this involved the vaccination of 2 and 3-year-olds nationally and a number of primary school age pilot areas, which could change the linear relationship between certain control and pilot locations. For each pilot area and set of pilot areas, the most highly correlated combination of control areas was used to then estimate the impact of the LAIV campaign for the 2014/2015 influenza season. There is some overlap with the pilot areas of the previous influenza season, so the same analysis was redone for the 2013/2014 season (in this case with a different set of control areas) so results could be compared to previous studies [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> and <xref ref-type="table" rid="table3">Table 3</xref> show the results for individual pilot locations, and sets of them for the 2014/2015 and 2013/2014 influenza season, respectively. For each area, the tables include the Pearson correlation <italic>r</italic>, the mean and 95% confidence intervals of 100,000 bootstrap estimates of the absolute and relative mean impact δ<sub>v</sub> and θ<sub>v</sub> during the intervention period, the number of control areas chosen <italic>n</italic> (<italic>c</italic>), and the size of the population targeted in the pilot <italic>Pop</italic> (<italic>v</italic>) and matched collection of control <italic>Pop</italic> (<italic>c</italic>) areas. The distribution of the bootstrap estimates was assessed graphically and seemed unimodal. Thus, statistically significant results are based on absolute values being higher than two standard deviations of the bootstrap estimates and are highlighted in italics. In addition, a significant preintervention correlation was necessary for reliable impact estimates, which we defined as being a Pearson correlation &#62;0.60, as was done in the previous study [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Estimates of the impacts of LAIV pilot program during the 2014/2015 influenza season in individual pilot locations and supersets of them. For each area considered, the precampaign Pearson correlation <italic>r</italic> with chosen control areas, the mean and 95% confidence intervals of the absolute and relative mean impact δ<sub>v</sub> and θ<sub>v</sub> during the intervention period, the number of control areas chosen <italic>n(c)</italic>, and the size of the population targeted in the chosen vaccination <italic>Pop(v)</italic> and control <italic>Pop(c)</italic> areas are presented. Statistically significant results are highlighted in italics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="335"/>
            <col width="50"/>
            <col width="140"/>
            <col width="150"/>
            <col width="50"/>
            <col width="85"/>
            <col width="85"/>
            <thead>
              <tr valign="top">
                <td><break/>Pilot area</td>
                <td><italic>r</italic><sup>a</sup></td>
                <td>δ<sub>v</sub><sup>b</sup></td>
                <td>θ<sub>v</sub><sup>c</sup></td>
                <td><italic>n</italic> (<italic>c</italic>)<sup>d</sup></td>
                <td><italic>Pop</italic> (<italic>v</italic>)<sup>e</sup></td>
                <td><italic>Pop</italic> (c)<sup>f</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>All vaccinated</td>
                <td>0.89</td>
                <td>-0.50 (-2.77 to 1.99)</td>
                <td>-4.51 (-25.72 to 22.61)</td>
                <td>10</td>
                <td>9,772,977</td>
                <td>5,066,069</td>
              </tr>
              <tr valign="top">
                <td><italic>All “Primary school”</italic></td>
                <td><italic>0.71</italic></td>
                <td><italic>-1.15 (-2.19 to -0.15)</italic></td>
                <td><italic>-16.97 (-30.09 to -2.42)</italic></td>
                <td><italic>8</italic></td>
                <td><italic>2,719,231</italic></td>
                <td><italic>2,371,367</italic></td>
              </tr>
              <tr valign="top">
                <td>All “Primary and Secondary school”</td>
                <td>0.84</td>
                <td>-0.06 (-1.50 to 1.43)</td>
                <td>-0.30 (-16.71 to 19.36)</td>
                <td>6</td>
                <td>1,097,419</td>
                <td>2,174,854</td>
              </tr>
              <tr valign="top">
                <td>All “Primary school” and “Primary and Secondary school”</td>
                <td>0.85</td>
                <td>-1.35 (-3.37 to 0.66)</td>
                <td>-13.01 (-30.54 to 7.31)</td>
                <td>9</td>
                <td>4,062,624</td>
                <td>3,601,377</td>
              </tr>
              <tr valign="top">
                <td>All “Secondary school”</td>
                <td>0.83</td>
                <td>0.06 (-1.58 to 1.90)</td>
                <td>1.41 (-19.40 to 28.40)</td>
                <td>7</td>
                <td>5,710,353</td>
                <td>4,038,921</td>
              </tr>
              <tr valign="top">
                <td>Cumbria (“Primary school”)</td>
                <td>0.59</td>
                <td>0.04 (-0.24 to 0.33)</td>
                <td>1.07 (-5.75 to 8.17)</td>
                <td>7</td>
                <td>497,874</td>
                <td>3,999,608</td>
              </tr>
              <tr valign="top">
                <td>Essex (“Primary school”)</td>
                <td>0.68</td>
                <td>-0.32 (-1.13 to 0.51)</td>
                <td>-5.91 (-20.56 to 10.58)</td>
                <td>8</td>
                <td>1,431,953</td>
                <td>3,199,730</td>
              </tr>
              <tr valign="top">
                <td><italic>Gateshead (“Primary school”)</italic></td>
                <td><italic>0.59</italic></td>
                <td><italic>-0.39 (-0.74 to -0.04)</italic></td>
                <td><italic>-8.46 (-15.56 to -1.02)</italic></td>
                <td><italic>4</italic></td>
                <td><italic>200,505</italic></td>
                <td><italic>1,551,060</italic></td>
              </tr>
              <tr valign="top">
                <td><italic>South Tyneside (“Primary school”)</italic></td>
                <td><italic>0.34</italic></td>
                <td><italic>0.25 (0.03 to 0.52)</italic></td>
                <td><italic>6.82 (0.81 to 14.07)</italic></td>
                <td><italic>3</italic></td>
                <td><italic>148,740</italic></td>
                <td><italic>1,697,971</italic></td>
              </tr>
              <tr valign="top">
                <td>Sunderland (“Primary school”)</td>
                <td>0.54</td>
                <td>0.12 (-0.05 to 0.32)</td>
                <td>3.20 (-1.38 to 8.38)</td>
                <td>3</td>
                <td>276,889</td>
                <td>1,119,136</td>
              </tr>
              <tr valign="top">
                <td>Thurrock (“Primary school”)</td>
                <td>0.32</td>
                <td>0.04 (-0.14 to 0.23)</td>
                <td>1.01 (-3.56 to 6.24)</td>
                <td>3</td>
                <td>163,270</td>
                <td>753,563</td>
              </tr>
              <tr valign="top">
                <td>Bury (“Primary and Secondary school”)</td>
                <td>0.32</td>
                <td>-0.11 (-0.37 to 0.12)</td>
                <td>-2.60 (-8.94 to 3.13)</td>
                <td>2</td>
                <td>187,474</td>
                <td>893,813</td>
              </tr>
              <tr valign="top">
                <td>Leicestershire (“Primary and Secondary school”)</td>
                <td>0.81</td>
                <td>0.32 (-0.70 to 1.38)</td>
                <td>4.97 (-10.01 to 21.22)</td>
                <td>6</td>
                <td>667,905</td>
                <td>2,756,865</td>
              </tr>
              <tr valign="top">
                <td>Salford (“Primary and Secondary school”)</td>
                <td>0.67</td>
                <td>0.40 (-0.20 to 1.01)</td>
                <td>8.45 (-3.96 to 22.00)</td>
                <td>7</td>
                <td>242,040</td>
                <td>4,183,184</td>
              </tr>
              <tr valign="top">
                <td>Havering (“Primary and Secondary school”-year 7)</td>
                <td>0.48</td>
                <td>-0.03 (-0.35 to 0.31)</td>
                <td>-0.55 (-8.23 to 7.79)</td>
                <td>4</td>
                <td>245,974</td>
                <td>1,742,705</td>
              </tr>
              <tr valign="top">
                <td>Birmingham (“Secondary school”)</td>
                <td>0.79</td>
                <td>0.53 (-0.27 to 1.34)</td>
                <td>10.36 (-4.86 to 27.21)</td>
                <td>10</td>
                <td>1,101,360</td>
                <td>5,435,742</td>
              </tr>
              <tr valign="top">
                <td>Lancashire (“Secondary school”)</td>
                <td>0.65</td>
                <td>0.18 (-0.78 to 1.13)</td>
                <td>3.45 (-13.41 to 21.40)</td>
                <td>8</td>
                <td>1,184,735</td>
                <td>3,463,060</td>
              </tr>
              <tr valign="top">
                <td>Leeds (“Secondary school”)</td>
                <td>0.63</td>
                <td>0.54 (-0.40 to 1.51)</td>
                <td>10.81 (-7.41 to 30.98)</td>
                <td>7</td>
                <td>766,399</td>
                <td>2,731,293</td>
              </tr>
              <tr valign="top">
                <td>Lincolnshire (“Secondary school”)</td>
                <td>0.66</td>
                <td>-0.29 (-0.78 to 0.19)</td>
                <td>-6.09 (-16.20 to 4.25)</td>
                <td>6</td>
                <td>731,516</td>
                <td>1,737,168</td>
              </tr>
              <tr valign="top">
                <td>Norfolk (“Secondary school”)</td>
                <td>0.71</td>
                <td>-0.12 (-0.60 to 0.35)</td>
                <td>-2.31 (-11.55 to 7.25)</td>
                <td>6</td>
                <td>877,710</td>
                <td>2,784,394</td>
              </tr>
              <tr valign="top">
                <td>Shropshire (“Secondary school”)</td>
                <td>0.35</td>
                <td>0.13 (-0.13 to 0.39)</td>
                <td>3.30 (-3.18 to 9.71)</td>
                <td>6</td>
                <td>310,121</td>
                <td>2,833,659</td>
              </tr>
              <tr valign="top">
                <td>Suffolk (“Secondary school”)</td>
                <td>0.59</td>
                <td>0.10 (-0.34 to 0.53)</td>
                <td>2.24 (-7.54 to 12.35)</td>
                <td>5</td>
                <td>738,512</td>
                <td>2,015,339</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup><italic>r</italic>: The precampaign Pearson correlation with the chosen aggregation of control areas</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>δ<sub>v</sub>: The absolute difference in the mean ILI rate during the intervention period</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>θ<sub>v</sub>: The relative difference in the mean ILI rate during the intervention period</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup><italic>n</italic> (<italic>c</italic>): The number of aggregated control areas chosen</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup><italic>Pop</italic> (<italic>v</italic>): The size of the population targeted in the chosen vaccination areas</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup><italic>Pop</italic> (<italic>c</italic>): The size of the population targeted in the chosen aggregation of control areas</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>

        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Estimates of the impacts of the LAIV pilot program during the 2013/2014 influenza season in individual pilot locations and supersets of these locations. For each area considered, the precampaign Pearson correlation <italic>r</italic> with chosen control areas, the mean and 95% confidence intervals of the absolute and relative mean impact δ<sub>v</sub> and θ<sub>v</sub> during the intervention period, the number of control areas chosen <italic>n(c)</italic>, and the size of the population targeted in the chosen vaccination <italic>Pop(v)</italic> and control <italic>Pop(c)</italic> areas are presented. Statistically significant results are highlighted in italics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="335"/>
            <col width="50"/>
            <col width="140"/>
            <col width="150"/>
            <col width="50"/>
            <col width="85"/>
            <col width="85"/>
            <thead>
              <tr valign="top">
                <td><break/>Pilot area</td>
                <td><italic>r</italic><sup>a</sup></td>
                <td>δ<sub>v</sub><sup>b</sup></td>
                <td>θ<sub>v</sub><sup>c</sup></td>
                <td><italic>n</italic> (<italic>c</italic>)<sup>d</sup></td>
                <td><italic>Pop</italic> (<italic>v</italic>)<sup>e</sup></td>
                <td><italic>Pop</italic> (c)<sup>f</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td><italic>All vaccinated (Primary school)</italic></td>
                <td><italic>0.82</italic></td>
                <td><italic>-1.03 (-2.00 to -0.10)</italic></td>
                <td><italic>-13.77 (-25.01 to -1.45)</italic></td>
                <td><italic>9</italic></td>
                <td><italic>3,231,685</italic></td>
                <td><italic>3,601,377</italic></td>
              </tr>
              <tr valign="top">
                <td>Leicestershire (Primary school)</td>
                <td>0.81</td>
                <td>-0.28 (-1.02 to 0.47)</td>
                <td>-4.44 (-15.93 to 7.95)</td>
                <td>6</td>
                <td>667,905</td>
                <td>2,756,865</td>
              </tr>
              <tr valign="top">
                <td>Essex (Primary school)</td>
                <td>0.68</td>
                <td>0.34 (-0.30 to 1.12)</td>
                <td>7.45 (-6.41 to 24.32)</td>
                <td>8</td>
                <td>1,431,953</td>
                <td>3,199,730</td>
              </tr>
              <tr valign="top">
                <td>Gateshead (Primary school)</td>
                <td>0.59</td>
                <td>0.38 (-0.06 to 0.85)</td>
                <td>9.11 (-1.40 to 20.76)</td>
                <td>4</td>
                <td>200,505</td>
                <td>1,551,060</td>
              </tr>
              <tr valign="top">
                <td>Cumbria (Primary school)</td>
                <td>0.59</td>
                <td>0.36 (-0.00 to 0.75)</td>
                <td>9.12 (-0.07 to 19.11)</td>
                <td>7</td>
                <td>497,874</td>
                <td>3,999,608</td>
              </tr>
              <tr valign="top">
                <td>Havering (Primary school)</td>
                <td>0.48</td>
                <td>0.15 (-0.19 to 0.52)</td>
                <td>3.80 (-4.99 to 13.43)</td>
                <td>4</td>
                <td>245,974</td>
                <td>1,742,705</td>
              </tr>
              <tr valign="top">
                <td>Bury (Primary school)</td>
                <td>0.32</td>
                <td>-0.09 (-0.34 to 0.14)</td>
                <td>-2.40 (-8.44 to 3.64)</td>
                <td>2</td>
                <td>187,474</td>
                <td>893,813</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup><italic>r</italic>: The precampaign Pearson correlation with the chosen aggregation of control areas</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>δ<sub>v</sub>: The absolute difference in the mean ILI rate during the intervention period</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>θ<sub>v</sub>: The relative difference in the mean ILI rate during the intervention period</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup><italic>n</italic> (<italic>c</italic>): The number of aggregated control areas chosen</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup><italic>Pop</italic> (<italic>v</italic>): The size of the population targeted in the chosen vaccination areas</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup><italic>Pop</italic> (<italic>c</italic>): The size of the population targeted in the chosen aggregation of control areas</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>For the 2014/2015 influenza season, correlations ranged from 0.32 to 0.89, and pilot areas with larger populations tend to have more control areas, larger populations of control areas, and higher Pearson correlations. The only significant impact was observed in the <italic>Primary school</italic> age pilot areas, for which the results suggest that during the 2014/2015 influenza season the mean ILI rate was reduced by 16.97% (95% CI 2.42-30.09). For the individual locations, Gateshead and South Tyneside did show significant results, but their precampaign correlations were 0.59 and 0.34, respectively; both were less than the predefined threshold of 0.60, which makes their impact estimates possibly less reliable.</p>
        <p>The correlations for the 2013/2014 influenza season ranged from 0.32 to 0.82, and whilst none of the individual locations demonstrated significant results, all pilots together estimated a statistically significant impact of a 13.77% (95% CI 1.45-25.01) reduction in the mean ILI rate during that season. Note that for the 2013/2014 season, the primary school-age vaccination was the only program implemented across all pilot areas.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>By using social media content to assess the impact of the childhood influenza pilot program in England in 2013/2014 and 2014/2015, statistically significant results suggest a reduction in the mean ILI rate of approximately 17% (<xref ref-type="table" rid="table2">Table 2</xref>, row 2, column 4) across all ages in <italic>Primary school</italic> age pilot areas only during the 2014/2015 influenza season and 14% (<xref ref-type="table" rid="table3">Table 3</xref>, row 1, column 4) in the aggregation of <italic>Primary school</italic> age vaccinated areas during the 2013/2014 influenza season.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Both impact estimates are in line with results from independent studies by PHE that used traditional surveillance systems [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. For the 2014/2015 season, however, the impact results are generally lower than expected with only a few statistically significant results. For example, it was expected that the <italic>Primary and Secondary school</italic> or the combined set of <italic>Primary school</italic> and <italic>Primary and Secondary school</italic> pilot locations would yield significant impacts, as they included a similar program to that in the <italic>Primary school</italic> pilot areas. Looking at the boundary boxes in more detail (<xref ref-type="fig" rid="figure1">Figure 1</xref>) shows that of the 4 <italic>Primary and Secondary school</italic> pilot areas, Leicestershire and Salford both include substantial parts of nonpilot areas, which is likely to have biased their results and underestimated effect sizes. The lack of statistically significant results across all individual locations is possibly due to the sparsity of the Twitter data available. For example, the individual <italic>Primary school</italic> pilot areas did not yield statistically significant impact estimates (with the exception of Gateshead and South Tyneside, which did show significant results, but their preintervention correlations were below the 0.60 threshold), whilst the aggregation of all <italic>Primary school</italic> areas did.</p>
        <p>The previous study by Lampos et al implemented a similar approach using Twitter and Bing data to assess the impact of the LAIV pilots during the 2013/2014 influenza season [<xref ref-type="bibr" rid="ref9">9</xref>]. This study estimated the impact to be approximately 33% for the aggregation of all pilot locations based on Twitter data, which is more than double what was found in this study. The discrepancy between these results is most likely due to two factors. First, the pilot areas used for the 2013/2014 season in the present study are slightly larger than those in the previous one, as some of the reused pilot areas have been expanded. This issue particularly applies to the boundary boxes for Leicestershire and Essex, as the previous study only included parts of these areas. Second, apart from one control area (Liverpool), most of the previous control areas were part of the 2014/2015 pilot program, and thus not reusable. New control areas were therefore selected, which may explain the discrepancy in impact estimates. Nevertheless, given that both studies exhibited a significant impact, the methodology produces qualitatively consistent results for the same influenza season, even when using a different set of control and pilot areas.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>There is a strong indication that the primary school age vaccination program has the potential to be an effective strategy in reducing influenza transmission in the general population. This notion supports the ongoing rollout of the campaign for primary school children. For a secondary school-only vaccination program offering the vaccine to just two-year cohorts (and not to all children of secondary age), there is no clear evidence of any population-wide effect. Both of these conclusions are in line with findings from previous studies and complement traditional surveillance sources in exhibiting community-wide effects of the LAIV pilot campaign [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref23">23</xref>].</p>
        <p>Most current influenza surveillance schemes rely on established health systems. Although these schemes provide important information on health care-related burden of disease and potential reductions due to vaccine impact, several provide less direct insight into community-wide transmission. User-generated content from social media offers rapid access to a larger range of the population, which has the potential of including a wider community (ie, including those that do not seek medical attention) and thus offers a valuable complementary source for the surveillance and evaluation of public health programs.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are several potential limitations in this study. Work is still needed to refine the methods used to deal with issues such as noise, model and data biases, and the fact that estimates from user-generated content are not directly based on actual ILI cases. More advanced natural language processing techniques may deliver more accurate results [<xref ref-type="bibr" rid="ref24">24</xref>]. The choice of control areas requires further refinement; we are seeking an even geographical distribution as well as an adequate distance from pilot areas to avoid regional biases, and to isolate the potential impact observed in pilot areas, respectively. Furthermore, the methodology is highly dependent on the quantity and type of user-generated data that is available, as this determines the accuracy and interpretation of the ILI rate estimates. The majority of Twitter users, for example, are between the ages of 15-44 years with a higher proportion situated in urban/suburban areas [<xref ref-type="bibr" rid="ref25">25</xref>]. This factor may skew results towards illness in certain demographic groups. The current framework conducts ILI rate modeling by training on syndromic surveillance data (from RCGP), such that biases that are found there are also passed onto the models. Furthermore, even if these biases can be avoided, there is an issue that no definite ground truth exists to allow for a proper verification.</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>Future work could aim at moving towards unsupervised models that do not depend on traditional surveillance sources for training purposes. These models could produce their own, independent ILI indicators based solely on user-generated content with the potential of being able to tap into the bottom part of the disease population pyramid [<xref ref-type="bibr" rid="ref10">10</xref>]. Inference of the demographics of users, such as age [<xref ref-type="bibr" rid="ref26">26</xref>], socioeconomic status [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], or severity of disease [<xref ref-type="bibr" rid="ref29">29</xref>] could be another focus of forthcoming work. Pebody et al showed that for both influenza seasons the impact of the pilot program was lower as influenza end-points of infection became more severe, which is an insight that the current modeling framework is unable to pick up on [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. With suitable data access in the future, this framework has the potential of assessing the impact of intervention programs whose uptake is variable. The applicability of this framework extends beyond influenza, but across a number of health interventions, thereby allowing for a timely and potentially cost-effective complementary to the collection of traditional surveillance data.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>The list of the 217 <italic>n</italic>-grams used as features in our predictive models for ILI rates.</p>
        <media xlink:href="rehab_v4i2e13_app1.pdf" xlink:title="PDF File (Adobe PDF File), 50KB"/>
      </app>
      <app id="app2">
        <title>Multimedia Appendix 2</title>
        <p>A table of the pilot and control areas chosen with their respective population size, distance to closest pilot areas, and geographical boundary rectangle corner coordinates.</p>
        <media xlink:href="jmir_v19i12e416_app2.pdf" xlink:title="PDF File (Adobe PDF File), 76KB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">GP</term>
          <def>
            <p>Gaussian Process</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ILI</term>
          <def>
            <p>influenza-like illness</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LAIV</term>
          <def>
            <p>live attenuated influenza vaccine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">MAE</term>
          <def>
            <p>mean absolute error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ONS</term>
          <def>
            <p>Office for National Statistics</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PHE</term>
          <def>
            <p>Public Health England</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RCGP</term>
          <def>
            <p>Royal College of General Practitioners</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work has been funded by the EPSRC through i-sense (<italic>Early-Warning Sensing Systems for Infectious Diseases</italic>; EP/K031953/1). We would also like to thank the RCGP for providing anonymized and aggregate syndromic surveillance data.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>VL, IJC, RP, EY-T, and MW conceived the general concept of this research; EY-T provided the Twitter data; VL designed the models; VL and MW performed the experiments; MW and VL wrote the paper; all coauthors reviewed and commented on the manuscript, and approved the final version.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
        <source>Joint Committee on Vaccination and Immunisation</source>  
        <year>2011</year>  
        <month>10</month>  
        <day>5</day>  
        <access-date>2017-06-15</access-date>
        <comment>Minute of the meeting held on Wednesday 5 October 2011 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://webarchive.nationalarchives.gov.uk/20120907090205/http://www.dh.gov.uk/prod_consum_dh/groups/dh_digitalassets/@dh/@ab/documents/digitalasset/dh_133598.pdf">http://webarchive.nationalarchives.gov.uk/20120907090205/http://www.dh.gov.uk/prod_consum_dh/groups/dh_digitalassets/@dh/@ab/documents/digitalasset/dh_133598.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6rEtwOgu3"/></comment> </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Baguelin</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Flasche</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Camacho</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Demiris</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Miller</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Edmunds</surname>
            <given-names>WJ</given-names>
          </name>
        </person-group>
        <article-title>Assessing optimal target populations for influenza vaccination programmes: an evidence synthesis and modeling study</article-title>
        <source>PLoS Med</source>  
        <year>2013</year>  
        <month>10</month>  
        <volume>10</volume>  
        <issue>10</issue>  
        <fpage>e1001527</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pmed.1001527"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pmed.1001527</pub-id>
        <pub-id pub-id-type="medline">24115913</pub-id>
        <pub-id pub-id-type="pii">PMEDICINE-D-13-00884</pub-id>
        <pub-id pub-id-type="pmcid">PMC3793005</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pebody</surname>
            <given-names>RG</given-names>
          </name>
          <name name-style="western">
            <surname>Green</surname>
            <given-names>HK</given-names>
          </name>
          <name name-style="western">
            <surname>Andrews</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Boddington</surname>
            <given-names>NL</given-names>
          </name>
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Yonova</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Ellis</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Steinberger</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Donati</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Elliot</surname>
            <given-names>AJ</given-names>
          </name>
          <name name-style="western">
            <surname>Hughes</surname>
            <given-names>HE</given-names>
          </name>
          <name name-style="western">
            <surname>Pathirannehelage</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Mullett</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>GE</given-names>
          </name>
          <name name-style="western">
            <surname>de Lusignan</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Zambon</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Uptake and impact of vaccinating school age children against influenza during a season with circulation of drifted influenza A and B strains, England, 2014/15</article-title>
        <source>Euro Surveill</source>  
        <year>2015</year>  
        <volume>20</volume>  
        <issue>39</issue>  
        <fpage>30029</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.eurosurveillance.org/ViewArticle.aspx?ArticleId=21256"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2807/1560-7917.ES.2015.20.39.30029</pub-id>
        <pub-id pub-id-type="medline">26537222</pub-id>
        <pub-id pub-id-type="pii">30029</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Milinovich</surname>
            <given-names>GJ</given-names>
          </name>
          <name name-style="western">
            <surname>Williams</surname>
            <given-names>GM</given-names>
          </name>
          <name name-style="western">
            <surname>Clements</surname>
            <given-names>AC</given-names>
          </name>
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <article-title>Internet-based surveillance systems for monitoring emerging infectious diseases</article-title>
        <source>Lancet Infect Dis</source>  
        <year>2014</year>  
        <month>02</month>  
        <volume>14</volume>  
        <issue>2</issue>  
        <fpage>160</fpage>  
        <lpage>168</lpage>  
        <pub-id pub-id-type="doi">10.1016/S1473-3099(13)70244-5</pub-id>
        <pub-id pub-id-type="medline">24290841</pub-id>
        <pub-id pub-id-type="pii">S1473-3099(13)70244-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ginsberg</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Mohebbi</surname>
            <given-names>MH</given-names>
          </name>
          <name name-style="western">
            <surname>Patel</surname>
            <given-names>RS</given-names>
          </name>
          <name name-style="western">
            <surname>Brammer</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Smolinski</surname>
            <given-names>MS</given-names>
          </name>
          <name name-style="western">
            <surname>Brilliant</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>Detecting influenza epidemics using search engine query data</article-title>
        <source>Nature</source>  
        <year>2009</year>  
        <month>02</month>  
        <day>19</day>  
        <volume>457</volume>  
        <issue>7232</issue>  
        <fpage>1012</fpage>  
        <lpage>1014</lpage>  
        <pub-id pub-id-type="doi">10.1038/nature07634</pub-id>
        <pub-id pub-id-type="medline">19020500</pub-id>
        <pub-id pub-id-type="pii">nature07634</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Cristianini</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Nowcasting events from the social web with statistical learning</article-title>
        <source>ACM Trans Intell Syst Technol</source>  
        <year>2012</year>  
        <month>09</month>  
        <day>01</day>  
        <volume>3</volume>  
        <issue>4</issue>  
        <fpage>1</fpage>  
        <lpage>22</lpage>  
        <pub-id pub-id-type="doi">10.1145/2337542.2337557</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Paul</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Dredze</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Broniatowski</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Twitter improves influenza forecasting</article-title>
        <source>PLoS Curr</source>  
        <year>2014</year>  
        <volume>6</volume>  
        <fpage>1</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.doi.org/10.1371/currents.outbreaks.90b9ed0f59bae4ccaa683a39865d9117"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/currents.outbreaks.90b9ed0f59bae4ccaa683a39865d9117</pub-id>
        <pub-id pub-id-type="medline">25642377</pub-id>
        <pub-id pub-id-type="pmcid">PMC4234396</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Miller</surname>
            <given-names>AC</given-names>
          </name>
          <name name-style="western">
            <surname>Crossan</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Stefansen</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>Advances in nowcasting influenza-like illness rates using search query logs</article-title>
        <source>Sci Rep</source>  
        <year>2015</year>  
        <month>08</month>  
        <day>03</day>  
        <volume>5</volume>  
        <fpage>12760</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/srep12760"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1038/srep12760</pub-id>
        <pub-id pub-id-type="medline">26234783</pub-id>
        <pub-id pub-id-type="pii">srep12760</pub-id>
        <pub-id pub-id-type="pmcid">PMC4522652</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Yom-Tov</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Pebody</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Cox</surname>
            <given-names>IJ</given-names>
          </name>
        </person-group>
        <article-title>Assessing the impact of a health intervention via user-generated Internet content</article-title>
        <source>Data Min Knowl Disc</source>  
        <year>2015</year>  
        <month>7</month>  
        <day>2</day>  
        <volume>29</volume>  
        <issue>5</issue>  
        <fpage>1434</fpage>  
        <lpage>1457</lpage>  
        <pub-id pub-id-type="doi">10.1007/s10618-015-0427-9</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gibbons</surname>
            <given-names>CL</given-names>
          </name>
          <name name-style="western">
            <surname>Mangen</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Plass</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Havelaar</surname>
            <given-names>AH</given-names>
          </name>
          <name name-style="western">
            <surname>Brooke</surname>
            <given-names>RJ</given-names>
          </name>
          <name name-style="western">
            <surname>Kramarz</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Peterson</surname>
            <given-names>KL</given-names>
          </name>
          <name name-style="western">
            <surname>Stuurman</surname>
            <given-names>AL</given-names>
          </name>
          <name name-style="western">
            <surname>Cassini</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Fèvre</surname>
            <given-names>EM</given-names>
          </name>
          <name name-style="western">
            <surname>Kretzschmar</surname>
            <given-names>MEE</given-names>
          </name>
          <collab>Burden of Communicable diseases in Europe (BCoDE) consortium</collab>
        </person-group>
        <article-title>Measuring underreporting and under-ascertainment in infectious disease datasets: a comparison of methods</article-title>
        <source>BMC Public Health</source>  
        <year>2014</year>  
        <month>02</month>  
        <day>11</day>  
        <volume>14</volume>  
        <fpage>147</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24517715"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1471-2458-14-147</pub-id>
        <pub-id pub-id-type="medline">24517715</pub-id>
        <pub-id pub-id-type="pii">1471-2458-14-147</pub-id>
        <pub-id pub-id-type="pmcid">PMC4015559</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pebody</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Green</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Andrews</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Boddington</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Bawa</surname>
            <given-names>Z</given-names>
          </name>
        </person-group>
        <article-title>Uptake and impact of a new live attenuated influenza vaccine programme in England: early results of a pilot in primary school-age children, 2013/14 influenza season</article-title>
        <source>Eurosurveillance</source>  
        <year>2014</year>  
        <volume>19</volume>  
        <issue>22</issue>  
        <fpage>20823</fpage>  
        <pub-id pub-id-type="doi">10.2807/1560-7917.es2014.19.22.20823</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Correa</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hinton</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>McGovern</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>van</surname>
            <given-names>VJ</given-names>
          </name>
          <name name-style="western">
            <surname>Yonova</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Jones</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>de</surname>
            <given-names>LS</given-names>
          </name>
        </person-group>
        <article-title>Royal College of General Practitioners Research and Surveillance Centre (RCGP RSC) sentinel network: a cohort profile</article-title>
        <source>BMJ Open</source>  
        <year>2016</year>  
        <month>04</month>  
        <day>20</day>  
        <volume>6</volume>  
        <issue>4</issue>  
        <fpage>e011092</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://bmjopen.bmj.com/cgi/pmidlookup?view=long&#38;pmid=27098827"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/bmjopen-2016-011092</pub-id>
        <pub-id pub-id-type="medline">27098827</pub-id>
        <pub-id pub-id-type="pii">bmjopen-2016-011092</pub-id>
        <pub-id pub-id-type="pmcid">PMC4838708</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>Office for National Statistics</collab>
        </person-group>
        <source>UK Data Service</source>  
        <year>2011</year>  
        <comment>2011 Census: boundary data (England and Wales) data collection 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://census.ukdataservice.ac.uk/get-data/boundary-data.aspx">http://census.ukdataservice.ac.uk/get-data/boundary-data.aspx</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6rEuELJCJ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <collab>Office for National Statistics</collab>
        </person-group>
        <source>Annual mid-year population estimates</source>  
        <year>2015</year>  
        <access-date>2017-06-15</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/bulletins/annualmidyearpopulationestimates/2015-06-25">http://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/bulletins/annualmidyearpopulationestimates/2015-06-25</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="6rEvoMShY"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rose</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <source>Rosemcgrory.co.uk</source>  
        <year>2016</year>  
        <access-date>2017-06-15</access-date>
        <comment>UK Social Media Statistics for 2016 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.rosemcgrory.co.uk/2016/01/04/social-media-statistics-2016/">http://www.rosemcgrory.co.uk/2016/01/04/social-media-statistics-2016/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6rEw81MAf"/></comment> </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Cristianini</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Tracking the influenza pandemic by monitoring the social web</article-title>
        <source>2nd International Workshop on Cognitive Information Processing</source>  
        <year>2010</year>  
        <conf-name>2nd International Workshop on Cognitive Information Processing</conf-name>
        <conf-date>2010</conf-date>
        <conf-loc>Elba Island, Italy</conf-loc>
        <fpage>441</fpage>  
        <lpage>416</lpage>  
        <pub-id pub-id-type="doi">10.1109/CIP.2010.5604088</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>De Bie</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Cristianini</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Flu Detector - tracking epidemics on Twitter</article-title>
        <source>LNCS</source>  
        <year>2010</year>  
        <volume>6323</volume>  
        <fpage>599</fpage>  
        <lpage>602</lpage>  
        <pub-id pub-id-type="doi">10.1007/978-3-642-15939-8_42</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Culotta</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Lightweight methods to estimate influenza rates and alcohol sales volume from Twitter messages</article-title>
        <source>Lang Resources &#38; Evaluation</source>  
        <year>2012</year>  
        <month>5</month>  
        <day>13</day>  
        <volume>47</volume>  
        <issue>1</issue>  
        <fpage>217</fpage>  
        <lpage>238</lpage>  
        <pub-id pub-id-type="doi">10.1007/s10579-012-9185-0</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rasmussen</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Williams</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <source>Gaussian processes for machine learning</source>  
        <year>2006</year>  
        <publisher-loc>Cambridge, MA</publisher-loc>
        <publisher-name>MIT Press</publisher-name></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Matern</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <source>Spatial variation</source>  
        <year>1986</year>  
        <publisher-loc>Berlin</publisher-loc>
        <publisher-name>Springer-Verlag</publisher-name></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Efron</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Tibshirani</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <source>An introduction to the bootstrap</source>  
        <year>1993</year>  
        <publisher-loc>New York</publisher-loc>
        <publisher-name>Chapman &#38; Hall</publisher-name></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lambert</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Pregibon</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Online effects of offline ads</article-title>
        <year>2008</year>  
        <conf-name>Proceedings of the 2nd International Workshop on Data Mining and Audience Intelligence for Advertising - ADKDD '08</conf-name>
        <conf-date>2008</conf-date>
        <conf-loc>Las Vegas, Nevada, USA</conf-loc>
        <pub-id pub-id-type="doi">10.1145/1517472.1517474</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
        <source>Public Health England</source>  
        <year>2014</year>  
        <access-date>2017-06-15</access-date>
        <comment>Surveillance of influenza and other respiratory viruses in the United Kingdom: winter 2014 to 2015 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/429617/Annualreport_March2015_ver4.pdf">https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/429617/Annualreport_March2015_ver4.pdf</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6rEyjgzcr"/></comment> </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Zou</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Cox</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <article-title>Enhancing feature selection using word embeddings</article-title>
        <year>2017</year>  
        <conf-name>Proceedings of the 26th International Conference on World Wide Web</conf-name>
        <conf-date>2017</conf-date>
        <conf-loc>Perth, Australia</conf-loc>
        <fpage>695</fpage>  
        <lpage>704</lpage>  
        <pub-id pub-id-type="doi">10.1145/3038912.3052622</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Duggan</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ellison</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Lampe</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Lenhart</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Madden</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <source>Pew Research Center: Internet, Science &#38; Tech</source>  
        <year>2015</year>  
        <access-date>2017-09-11</access-date>
        <comment>Demographics of Key Social Networking Platforms Internet 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.pewinternet.org/2015/01/09/demographics-of-key-social-networking-platforms-2/#twitter">https://www.pewinternet.org/2015/01/09/demographics-of-key-social-networking-platforms-2/#twitter</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6tOVyGVDv"/></comment> </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rao</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Yarowsky</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Shreevats</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Gupta</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Classifying latent user attributes in Twitter</article-title>
        <year>2010</year>  
        <conf-name>Proceedings of the 2nd International Workshop on Search and Mining User-Generated Contents</conf-name>
        <conf-date>2010</conf-date>
        <conf-loc>Toronto, ON, Canada</conf-loc>
        <fpage>37</fpage>  
        <lpage>44</lpage>  
        <pub-id pub-id-type="doi">10.1145/1871985.1871993</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Preoţiuc-Pietro</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Aletras</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>An analysis of the user occupational class through Twitter content</article-title>
        <source>Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing</source>  
        <year>2015</year>  
        <conf-name>Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics</conf-name>
        <conf-date>2015</conf-date>
        <conf-loc>Beijing, China</conf-loc>
        <fpage>1754</fpage>  
        <lpage>1764</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.aclweb.org/anthology/P15-1169"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Aletras</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Geyti</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Zou</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Cox</surname>
            <given-names>I</given-names>
          </name>
        </person-group>
        <article-title>Inferring the socioeconomic status of social media users based on behaviour and language</article-title>
        <year>2016</year>  
        <conf-name>Proceedings of the 38th European Conference on Information Retrieval</conf-name>
        <conf-date>2016</conf-date>
        <conf-loc>Padua, Italy</conf-loc>
        <fpage>689</fpage>  
        <lpage>695</lpage>  
        <pub-id pub-id-type="doi">10.1007/978-3-319-30671-1_54</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yom-Tov</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Johansson-Cox</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Lampos</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Hayward</surname>
            <given-names>AC</given-names>
          </name>
        </person-group>
        <article-title>Estimating the secondary attack rate and serial interval of influenza-like illnesses using social media</article-title>
        <source>Influenza Other Respir Viruses</source>  
        <year>2015</year>  
        <month>07</month>  
        <volume>9</volume>  
        <issue>4</issue>  
        <fpage>191</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.doi.org/10.1111/irv.12321"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1111/irv.12321</pub-id>
        <pub-id pub-id-type="medline">25962320</pub-id>
        <pub-id pub-id-type="pmcid">PMC4474495</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
