<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v24i8e38319</article-id>
      <article-id pub-id-type="pmid">36006693</article-id>
      <article-id pub-id-type="doi">10.2196/38319</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Assessing Social Media Data as a Resource for Firearm Research: Analysis of Tweets Pertaining to Firearm Deaths</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kim</surname>
            <given-names>Yoonsang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Weichelt</surname>
            <given-names>Bryan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jordan</surname>
            <given-names>Lisa</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Singh</surname>
            <given-names>Lisa</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>Massive Data Institute</institution>
            <institution>Georgetown University</institution>
            <addr-line>3700 O Street, NW</addr-line>
            <addr-line>Washington, DC, 20057</addr-line>
            <country>United States</country>
            <phone>1 2026879253</phone>
            <email>lisa.singh@georgetown.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8300-2970</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Gresenz</surname>
            <given-names>Carole Roan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7381-7914</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Yanchen</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7822-7163</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>Sonya</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9651-7236</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>Massive Data Institute</institution>
        <institution>Georgetown University</institution>
        <addr-line>Washington, DC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>McCourt School of Public Policy</institution>
        <institution>School of Health</institution>
        <institution>Georgetown University</institution>
        <addr-line>Washington, DC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Computer Science</institution>
        <institution>Georgetown University</institution>
        <addr-line>Washington, DC</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Lisa Singh <email>lisa.singh@georgetown.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>25</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>24</volume>
      <issue>8</issue>
      <elocation-id>e38319</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>28</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>7</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Lisa Singh, Carole Roan Gresenz, Yanchen Wang, Sonya Hu. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 25.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2022/8/e38319" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Historic constraints on research dollars and reliable information have limited firearm research. At the same time, interest in the power and potential of social media analytics, particularly in health contexts, has surged.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study is to contribute toward the goal of establishing a foundation for how social media data may best be used, alone or in conjunction with other data resources, to improve the information base for firearm research.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We examined the value of social media data for estimating a firearm outcome for which robust benchmark data exist—specifically, firearm mortality, which is captured in the National Vital Statistics System (NVSS). We hand curated tweet data from the Twitter application programming interface spanning January 1, 2017, to December 31, 2018. We developed machine learning classifiers to identify tweets that pertain to firearm deaths and develop estimates of the volume of Twitter firearm discussion by month. We compared within-state variation over time in the volume of tweets pertaining to firearm deaths with within-state trends in NVSS-based estimates of firearm fatalities using Pearson linear correlations.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The correlation between the monthly number of firearm fatalities measured by the NVSS and the monthly volume of tweets pertaining to firearm deaths was weak (median 0.081) and highly dispersed across states (range –0.31 to 0.535). The median correlation between month-to-month changes in firearm fatalities in the NVSS and firearm deaths discussed in tweets was moderate (median 0.30) and exhibited less dispersion among states (range –0.06 to 0.69).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our findings suggest that Twitter data may hold value for tracking dynamics in firearm-related outcomes, particularly for relatively populous cities that are identifiable through location mentions in tweet content. The data are likely to be particularly valuable for understanding firearm outcomes not currently measured, not measured well, or not measurable through other available means. This research provides an important building block for future work that continues to develop the usefulness of social media data for firearm research.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>firearms</kwd>
        <kwd>fatalities</kwd>
        <kwd>Twitter</kwd>
        <kwd>firearm research</kwd>
        <kwd>social media data</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Motivation</title>
        <p>Firearm violence is a major and costly public health burden in the United States [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>], and constraints on the availability of research dollars and reliable information to support firearm research have imposed limits on the ability to gather scientific evidence on effective gun policy [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. At the same time, interest in the power and potential of social media analytics in public health contexts has surged. Several aspects of social media data have heightened their promise as a resource, including the fact that the data are inexpensive to obtain compared with survey data; provide access to continuous, automated, and near–real-time monitoring; and are passively collected in a naturalistic setting as part of an individual’s day-to-day life, eliminating biases inherent to sampling procedures, questionnaires, and recall [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Such data are, of course, not without their own methodological challenges and limitations, and practices for their ethical and meaningful use are evolving [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>].</p>
        <p>To date, such data have been deployed in firearm-related research in several ways, including to record narratives, sentiment, and emotion around shooting events [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]; characterize gun advertisements on social media [<xref ref-type="bibr" rid="ref21">21</xref>]; and reflect opinions on gun policies and gun control [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. In this paper, we take up the question of how social media data may contribute to understanding firearm-related outcomes. We identify methodological approaches, challenges, and limitations associated with using social media data for understanding a specific firearm outcome—firearm mortality—for which a benchmark measure for comparison is available from a traditional data source. The analysis of firearm mortality is intended to serve as a test of the potential utility of social media data for understanding firearm outcomes not currently measured, not measured well, or not measurable through other available means.</p>
      </sec>
      <sec>
        <title>Assessing the Usefulness of Twitter Data</title>
        <p>Specifically, we assessed the usefulness of Twitter data for understanding firearm mortality. Twitter is an online microblogging platform that has &#62;206 million daily active users worldwide and &#62;77 million daily active users in the United States [<xref ref-type="bibr" rid="ref24">24</xref>]. A key feature of Twitter is its short format: members can only post messages, known as <italic>tweets</italic>, of up to 280 characters. We developed machine learning (ML) classifiers for identifying tweets that pertain to firearm fatalities and compared measures of firearm-fatality discussion volume to firearm-fatality estimates by state from the National Vital Statistics System (NVSS). The NVSS represents one of the few sources of US health-related data with consistently collected and reliable information on a specific gun outcome measured by geographic area. Our goal was to begin to establish a foundation for how social media data may be used by itself or in conjunction with other data resources, such as through data-blending techniques, to improve the information base on which firearm research relies.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethics Approval</title>
        <p>The institutional review board of Georgetown University reviewed our submission, STUDY00002288, and determined the study to be exempt.</p>
      </sec>
      <sec>
        <title>Overview</title>
        <p>Our overarching approach was to compare—within state over time (by month)—measures of firearm-fatality tweet discussion volume with NVSS estimates of firearm fatalities using Pearson linear correlations. Methodologically, with respect to Twitter data, we used a multistage process as described in detail in the following subsections. We first describe our benchmark data and then describe in detail our approach to analyzing Twitter data.</p>
      </sec>
      <sec>
        <title>Benchmark Data</title>
        <p>Our benchmark data are NVSS estimates of overall firearm fatalities by state and month for 2017 and 2018. Diagnostic (International Classification of Diseases, Tenth Revision) codes in the NVSS identify mortality from accidental firearm discharges, assaults (homicides) by discharge of firearms, and intentional self-harm (suicides) by firearms. Data are collected nationwide using standardized forms and a set of common procedures to ensure comparability of data across locations.</p>
      </sec>
      <sec>
        <title>Twitter Data</title>
        <p>We developed a Twitter-based gun-related analytic platform based on content culled from the Twitter Enterprise application programming interface (API) for the 2017-2018 time period through the multistage process depicted in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The process consists of 4 stages to prepare the data for ML and 3 stages associated with ML analysis.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Construction of the Twitter-based gun-related analytic platform. API: application programming interface; ML: machine learning.</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e38319_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The API allows permitted users to access publicly available Twitter content—including tweets; tweet IDs (a unique identification number generated for each tweet); and Twitter profile information such as display name, username, user bio, and publicly stated location—under a developer agreement. The developer agreement requires that the data are used in ways consistent with people’s reasonable expectation of privacy and are not used for developing, creating, or offering commercial services in ways that violate Twitter’s policies. To identify relevant tweets, we hand curated a selected set of keywords and hashtags relating to firearms by looking at a random sample of actual tweets and using keywords identified in previous literature. The query we used to collect data from the API included &#62;200 keywords and hashtags (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The data retrospectively collected through the API adds a language label to each tweet. In this study, we used tweets labeled as being written in English.</p>
        <p>The initial database we derived from the Twitter API using our curated set of firearm-related keywords and hashtags included &#62;2.3 million tweets for 2017 and 2018. More specifically, we obtained 651,466 tweets from 2017 and 1,675,083 tweets from 2018 (with the increase in the number of tweets over time reflecting larger trends in Twitter discussion on the topic). Given that billions of tweets are posted each year in English on Twitter, the discussion of firearms constitutes a relatively modest size.</p>
        <p>Next, in <italic>stage 2</italic>, because social media data are subject to the influence of robots, advertisers, and marketers, the data must be classified and filtered to exclude irrelevant data. We used a multistage process to identify and remove spam (advertising, dead links, pornography, etc). We began by detecting spam using a content-based algorithm because spam can be generated by both humans and bots. The content-based algorithm first looks for website URLs related to known advertising, phishing scheme, malware, gambling, and pornography sites. Our spam blacklist contains &#62;2 million website URLs. The second part of the spam classifier looks for content that maps to standard spam content or differs significantly from other content on the tweet stream being collected [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
        <p>In <italic>stage 3</italic>, we randomly sampled tweets from the resulting data for manual labeling—a process of assigning each tweet a set of characteristics, or features, relevant to the study question. We labeled three firearm-related features of tweets: whether the tweet pertains to (1) firearms (2) fatality or fatalities, or (3) a mass shooting. Our analyses focused on firearm fatality (a combination of characteristics 1 and 2) and mass shooting. We also labeled tweets as an advertisement or irrelevant, spam, or noise and used these labels to further improve our spam classifier and remove identified spam tweets from further analysis.</p>
        <p>The manual labeling process relied on crowdsourced, distributed labor through Amazon Mechanical Turk (MTurk) [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>]. We recognize that varying and evolving views exist regarding the use of this platform [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>] and were attentive to these considerations in our study design, which was vetted and approved by our institutional review board. We applied best practices, creating as clear and streamlined a task as possible and training MTurk coders through a written instruction guide and with labeling examples (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) [<xref ref-type="bibr" rid="ref32">32</xref>]. Recent research confirms that MTurk can be a useful resource for quickly gathering reliable data labels for training ML models when best practices are used [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        <p>We required each tweet to be labeled by 5 different coders, and we calculated the interrater reliability of labeling across coders. The manual labeling process continued until we reached a threshold number (minimum of 400) of tweets that were labeled as positively identifying a particular characteristic. We found that at least 400 tweets for each class in our ML model was reasonable for building a reliable classifier for our learning tasks. The total number of tweets labeled for each characteristic varied because coders may label one or more characteristics for each tweet, rather than all characteristics for each tweet.</p>
        <p>As a means of assessing the manual labeling process, we calculated 2 scores for the set of tweets labeled for each characteristic. The first measured task agreement. For each tweet, we assigned the value of the characteristic being measured according to the majority vote (eg, if, of 5 labelers, 3 chose <italic>yes</italic> for firearm-related and 2 chose <italic>no</italic>, we assigned the value of <italic>yes</italic>) and then calculated the percentage of coders who agreed on this value (in this case, 3/5 = 60%). The task agreement is the average across all tweets for a given characteristic of this score. Second, we calculated a worker performance score for each coder in which the denominator was the total number of characteristics a coder labeled, and the numerator was the number of characteristics labeled for which the coder’s assigned label aligned with the majority vote. We then calculated the average worker performance score for the set of coders who labeled the set of tweets used for measuring each of the characteristics.</p>
        <p><xref ref-type="table" rid="table1">Table 1</xref> summarizes the number of tweets that were manually labeled along with task agreement and worker performance score metrics.</p>
        <p>As shown in <xref ref-type="table" rid="table1">Table 1</xref>, we found high rates of task agreement and worker performance for identifying firearm fatalities (97.14% and 97.19% for task agreement and worker performance, respectively) and mass shooting events (95.42% and 94.96% for task agreement and worker performance, respectively). We noted that 50 tweets that were labeled as being firearm-related were not labeled with a mass shooting characteristic. This occurred in our initial experiment of the labeling task. In this experiment, we labeled tweets as being about a mass shooting, homicide, or suicide. If a tweet was labeled as being about a homicide or suicide, we did not ask the labeler to determine whether the tweet was about a mass shooting. In subsequent experiments, we only focused on capturing firearm-related deaths more broadly and mass shootings explicitly to allow for count adjustments. Therefore, for subsequent experiments (we collected a few hundred labels at a time), we always asked labelers to determine whether a tweet about firearm-related fatality was discussing a mass shooting event.</p>
        <p>In <italic>stage 4</italic>, we defined <italic>reliably labeled</italic> tweets as those for which there was manual labeling agreement among ≥3 coders. We dropped tweets that had a reliable label of uncertain or were not reliably labeled from further analysis. This means that our training data did not include ambiguous tweets and, therefore, may undercount our characteristics.</p>
        <p>The next three stages (<italic>stages 5</italic>, <italic>6</italic>, and <italic>7</italic>) of the process involved firearm-related ML. In <italic>stage 5</italic>, we divided the subset of reliably labeled tweets into training data—on which we built ML classifiers—and holdout data, which were used to validate the classifiers. We randomly selected 80% of reliably labeled tweets for the training data and 20% for the holdout. When building the ML classifiers, we used 5-fold cross-validation on the training data to measure the reliability of the classifiers. Cross-validation is a resampling procedure that allows researchers to determine whether their ML models are generalizable [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. In 5-fold cross-validation, the data set is partitioned into 5 equal subparts (or <italic>folds</italic>). Of the 5 folds, 4 (80% of the data) are used for training, and 1 (20% of the data) is used for testing. This is repeated 5 times so that each fold is part of the training set 4 times and part of the testing set 1 time, and the final accuracy of the model is determined by taking the mean accuracy of all the created models on the testing set.</p>
        <p>We began building ML classifiers to identify tweets pertaining to a firearm fatality and to a mass shooting. We minimally preprocessed the data: lowercased text, removed punctuation and URLs, and removed stopwords. We generated a number of features for the ML classifiers: frequent n-grams, words and phrases, and sentiment. The classifiers we compared were random forest, support vector machine, logistic regression, decision tree, and naïve Bayes. In <italic>stage 6</italic>, we validated the classifiers we developed for firearm fatalities and mass shootings in <italic>stage 5</italic> by further testing them on holdout data. We calculated the sensitivity and specificity of the ML model predictions against those of the manually coded firearm-fatality label.</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> summarizes the best-performing ML classifier for each classification task along with the training and holdout data set sizes and a measure of reliability based on the testing data, using our cross-validation approach, and the holdout data. The <italic>F</italic><sub>1</sub>-score is a weighted average of sensitivity and specificity (precision and recall) that considers both false positives and false negatives. For firearm-related fatality, we had 6045 labeled tweets. For mass shooting, we had 5842 labeled tweets. Because of heavy skews (imbalance) in the training data, we randomly undersampled from the labeled data of the majority label to balance the training and holdout data sets. <xref ref-type="table" rid="table2">Table 2</xref> shows the training and holdout data set sizes after this procedure.</p>
        <p>We selected random forest classifiers for both firearm fatalities and mass shooting characteristics. The <italic>F</italic><sub>1</sub>-scores, as shown in <xref ref-type="table" rid="table2">Table 2</xref>, are high and comparable for the testing and holdout data, indicating a clear ability of the classifiers to generalize beyond the training data set.</p>
        <p><italic>Stage 7</italic> completed the development of our Twitter-based gun-related analytic platform with the third and final piece of the ML analysis. In <italic>stage 7</italic>, we applied the validated classifier to identify firearm-fatality tweets.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Manually labeled tweet characteristics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="460"/>
            <col width="270"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td>Tweet label</td>
                <td>Firearm-related fatality</td>
                <td>Mass shooting</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Total number of tweets labeled (yes, no, unsure)</td>
                <td>5868 (5528, 330, 10)</td>
                <td>5478 (419, 5056, 3)</td>
              </tr>
              <tr valign="top">
                <td>Task agreement, %</td>
                <td>97.14</td>
                <td>95.42</td>
              </tr>
              <tr valign="top">
                <td>Worker performance score, %</td>
                <td>97.19</td>
                <td>94.96</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Machine learning (ML) classifier type and reliability.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="460"/>
            <col width="270"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td>Prediction task</td>
                <td>Firearm-related fatality</td>
                <td>Mass shooting</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Training data size, n</td>
                <td>1142</td>
                <td>1038</td>
              </tr>
              <tr valign="top">
                <td>Holdout data size, n</td>
                <td>286</td>
                <td>256</td>
              </tr>
              <tr valign="top">
                <td>Best ML classifier</td>
                <td>Random forest</td>
                <td>Random forest</td>
              </tr>
              <tr valign="top">
                <td><italic>F</italic><sub>1</sub>-score: cross-validation, test data, mean (SD)</td>
                <td>0.91 (0.017)</td>
                <td>0.88 (0.012)</td>
              </tr>
              <tr valign="top">
                <td><italic>F</italic><sub>1</sub>-score: holdout data</td>
                <td>0.90</td>
                <td>0.88</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Geographic Area Estimation of Twitter Firearm-Fatality Discussion Volume</title>
        <p>The NVSS classifies fatalities according to the geographic jurisdiction in which the fatality occurred. Thus, for comparison with the state-level NVSS estimates, the <italic>location of the fatality</italic> being discussed on Twitter is the location of interest (vs the location of the individual who is tweeting). We relied on the tweet content to identify the location of the fatality because location information from either profile information or tweet geocoding (which some users permit) identifies the location of the user (as opposed to location of the fatality).</p>
        <p>Importantly, location mentions in tweets primarily refer to city names. In some cases, state name is also mentioned, whereas in other cases, state can be inferred from the city name. To obtain a reasonably sufficient number of tweets per location for estimating area-level fatality discussion volume, we focused on identifying the larger cities mentioned in tweets. Specifically, we identified tweets in our sample that mentioned any of the 250 most populous cities in 2018 (based on US Census data [<xref ref-type="bibr" rid="ref35">35</xref>]). A limitation of this approach is that it focuses on fatalities in urban areas rather than in rural areas.</p>
        <p>We augmented the list of 250 city names with alternative city names commonly used on social media, such as <italic>nyc</italic>, and with city names that contain no spaces between multiple words, such as <italic>sanfrancisco.</italic> We standardized posts—converting the text to lowercase and removing URLs, user mentions (words prefixed with @), and common phrases that may look as though they are city mentions when they are not. An example of a common phrase we removed is <italic>drag queens</italic> because it may be accidently mapped to Queens, New York City, New York. After standardization, we searched the text for city names that matched our location ontology. The majority of city names among the 250 are associated with, and can thus be reliably mapped to, a single state. For our specific set of tweets, there were no cities mentioned that mapped to multiple states.</p>
        <p>We summed tweet discussion volume across the most populous cities within a state to create a state-level measure. We constructed state-level estimates for Arizona, California, Colorado, Florida, Georgia, Illinois, Indiana, Kentucky, Louisiana, Maryland, Massachusetts, Michigan, Missouri, Nevada, New Jersey, New York, North Carolina, Ohio, Pennsylvania, Tennessee, Texas, Virginia, Washington, and Wisconsin. We excluded from further analysis those states for which the sample size of tweets was &#60;200 tweets after the mass shooting adjustments (described in the next paragraph) because they are home to only one or only a few of the more populous cities (eg, Idaho, Iowa, Nebraska, and Oregon), and the populous cities in the state are relatively small (eg, Kansas, Alabama, and Arkansas).</p>
        <p>The resulting data set, after applying the best ML classifier to identify firearm-fatality tweets and identifying the state of the fatality using location mentions, included 31,747 tweets from 2017 and 44,779 from 2018. We summarized firearm-fatality discussion volume for each state using these data. We then adjusted the state-level estimates of firearm-fatality discussion volume in 3 ways. First, mass shooting events tend to generate disproportionately high levels of discussion, that is, levels of discussion that are far higher than for other less high-profile fatalities. We accounted for the potential distorting influence of mass shooting events on the relationship between a gun fatality and tweet discussion volume by excluding tweets from the location of mass shooting events for a period of 1 week after the event. We based the 1-week exclusion period on observed trends in mass shooting discussion volume. We identified mass shooting events during the time frame of our data using information from the Gun Violence Archive [<xref ref-type="bibr" rid="ref37">37</xref>], Everytown Research [<xref ref-type="bibr" rid="ref38">38</xref>], and The Violence Project [<xref ref-type="bibr" rid="ref39">39</xref>]. Finally, we adjusted our estimates of state-level discussion volume by the percentage of the state-level population that uses Twitter [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
        <p>We tested for serial correlation and found that the NVSS data contained 10 states in our final data set with some moderate serial correlation, and the Twitter data contained 5 states with moderate serial correlation. For this reason, we made each time series stationary by differencing monthly estimates [<xref ref-type="bibr" rid="ref41">41</xref>]; we refer to this as the <italic>Change</italic> result. For the level correlation, we removed states in which both time series had higher levels of serial correlation because the correlation is valid if one of the time series exhibits serial correlation and the other does not [<xref ref-type="bibr" rid="ref42">42</xref>]. This issue arose with four states: Georgia, Indiana, Michigan, and North Carolina.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Correlation Analysis</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows results from our correlation analysis. We estimated the correlation within state by month between the <italic>level</italic> of firearm-fatality discussion volume and the <italic>level</italic> of NVSS-reported fatalities, as well as the correlation within each state in the monthly <italic>change</italic> in discussion volume versus the monthly <italic>change</italic> in the NVSS fatality rate.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Results of correlation analysisa.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="190"/>
            <col width="480"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Discussion volume</td>
                <td>Discussion volume adjusted for average state-level Twitter use</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>LEVEL: Correlation, range</td>
                <td>–0.293 to 0.535</td>
                <td>–0.289 to 0.537</td>
              </tr>
              <tr valign="top">
                <td>LEVEL: Correlation, mean; median</td>
                <td>0.085; 0.091</td>
                <td>0.087; 0.093</td>
              </tr>
              <tr valign="top">
                <td>CHANGE: Correlation, range</td>
                <td>–0.057 to 0.682</td>
                <td>–0.059 to 0.688</td>
              </tr>
              <tr valign="top">
                <td>CHANGE: Correlation, mean; median</td>
                <td>0.313; 0.303</td>
                <td>0.312; 0.301</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Pearson linear correlations are reported.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The correlation between the monthly level of firearm-fatality tweets and the monthly number of fatalities measured by the NVSS is weak (median 0.081) and widely dispersed across states (range –0.31 to 0.54). The correlation between month-to-month changes in firearm fatalities discussed in tweets versus those estimated in the NVSS is moderate (median 0.30) and exhibited less dispersion among states than the monthly level correlations (range –0.057 to 0.68). For the correlation among month-to-month changes in firearm fatalities, almost half (11/24, 46%) of the states have correlations ranging from 0.1 to 0.4. More than a quarter (7/24, 29%) of the states have correlations below this range, and a quarter (6/24, 25%) have correlations above this range. The results for the adjusted discussion volume (second row of <xref ref-type="table" rid="table3">Table 3</xref>, discussion volume adjusted for Twitter use in the state) are very similar to the unadjusted results, with negligible differences observed in estimated correlation rates.</p>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> provides additional details for the correlation in monthly changes in fatality discussion volume and NVSS-estimated fatalities, with a depiction of state-by-state (adjusted) correlation rates for 2017. White-shaded states have no correlation. The darker the purple shade of a state, the higher the correlation. The gray-shaded states are those for which we were not able to estimate a Twitter fatality discussion rate (refer to the Twitter Data subsection under Methods). Not unexpectedly, the strength of the correlation seems to be related to the percentage of the state’s population living in one of the most populous cities that we use in our location ontology; for example, one-third or more of the state population in Texas, New York, and Arizona reside in one of the top 100 most populated cities in the state (34%, 44%, and 51%, respectively). These states exhibit some of the highest correlation rates between monthly fluctuation in firearm-fatality discussion volume and NVSS-based fatality estimates. Likewise, Georgia, Michigan, and Maryland are among the states with both the lowest percentage of their population living in more populous cities (5%, 7% and 10%, respectively) and have some of the lowest rates of correlation among the states studied.</p>
        <p>By contrast, 47% of Nevada residents live in one of the most populous cities, but the correlation rate in Nevada falls into a lower tier than the correlation rates in Texas, New York, and Arizona. Although we adjusted for mass shooting discussion volume by removing tweets from the week after such an event, the lower correlation observed in Nevada suggests that the adjustment may have been insufficient for capturing the extent of discussion volume distortion in the wake of the mass shooting event in Las Vegas, given the magnitude of the event. Analyzing the data in more detail shows that discussion of this shooting returns at anniversaries (1 year) and when other larger mass shootings occur in other parts of the country, identifying a need for a more extensive adjustment for historically large mass shootings.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Correlation by state between change in firearm–fatality tweets and change in National Vital Statistics System–estimated firearm fatalities in 2017.</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e38319_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Comparison of Correlations</title>
        <p>Furthermore, a comparison of correlations for each state in 2017 versus 2018 shows that states with the largest cities tend to have the most stable correlations; for example, Texas, New York, California, Florida, and Ohio; whereas states with fewer large cities and fewer tweets tend to have higher variation in their correlation estimates; for example, Missouri, Tennessee, South Carolina, and Maryland. An additional factor that is likely to affect the correlation rate is the location within the state of firearm fatalities. To the extent that fatalities within a state are more concentrated in the most populated cities, the correlation between NVSS-estimated fatalities and Twitter discussion volume is expected to be higher.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Among the subset of states studied, we found weak-to-moderate correlation between our measure of the level of firearm-fatality tweets and the NVSS-based estimates of the level of firearm fatalities and higher moderate correlation in measures of the month-to-month changes in firearm-fatality tweets and estimated fatalities. As our ontology for Twitter location mentions relies on identification of the 250 most populous cities, our correlation is higher in states in which more of the state’s population was living in one of these cities. We further expected the correlation to be higher in areas where firearm fatalities were concentrated in the most populated cities and found suggestive evidence regarding this point.</p>
        <p>A key limitation of this analysis is that we relied on tweets from more populated cities to develop a state-level estimate of discussion volume. Our approach reflected, dually, the limited availability of firearm-fatality data at the city level and the limited availability of location identifiers for tweets. An important feature of this analysis was the need to identify the location of the event being discussed versus the location of the user. In the case of the latter, geocoding of the user profile is advantageous and can provide a state-level identifier, but the former relies only on location mentions within the tweet.</p>
        <p>Even with these limitations, the correlation capturing fluctuation in firearm mortality is moderate. We view this as a promising signal for the potential of social media data to provide meaningful information on gun-related outcomes in the future. More specifically, our findings suggest that Twitter data may hold particular value for tracking dynamics in gun-related outcomes. In addition, for location-specific firearm-related outcomes, the data are most valuable for understanding dynamics in relatively populous cities that are identifiable through location mentions in tweet content. Finally, the data are likely to be particularly valuable for understanding firearm outcomes not currently measured, not measured well, or not measurable through other available means. A key advantage of Twitter data is the continuous, automated, and near–real-time monitoring they provide [<xref ref-type="bibr" rid="ref13">13</xref>]. Once big data infrastructure has been invested in, the data can be relatively easily processed. The initial cost of big data infrastructure can be high if researchers want to stream data for large periods of time. However, for a single study, researchers who can access a server should be able to conduct the analysis at a low cost. Because of this potentially higher investment, we have developed a text analytic portal that allows researchers to construct variables from our social media data [<xref ref-type="bibr" rid="ref43">43</xref>], thereby enabling future research with these data without the cost of setting up big data infrastructure.</p>
        <p>We recognize the need for additional analyses to continue to adapt and extend upon the approach developed and applied in this research, including, for example, work that assesses the reliability of associations over longer time periods. We also note that, unlike survey data that are sampled to be representative of the underlying population, social media data emanate from those who use a particular platform. Although the use of Twitter in the United States is significant (in 2021, nearly a quarter of adults reported using Twitter, and among those who reported using the platform, nearly half said that they use it once a day or more) [<xref ref-type="bibr" rid="ref44">44</xref>], it is nonetheless also true that rates of social media use are correlated with age and to some extent with other demographic characteristics [<xref ref-type="bibr" rid="ref44">44</xref>]. Much of the existing analytic work with social media data does not directly deal with this issue. In our approach, we adjusted our estimates for the percentage of Twitter users in each state. Additional statistical adjustments that more completely account for engagement with the platform are important for future work. Furthermore, social media data include limited sociodemographic information about users. Additional methodological strides toward developing robust methods for demographic imputations represent an important dimension of future efforts.</p>
      </sec>
      <sec>
        <title>Usefulness of Social Media Data</title>
        <p>The Centers for Disease Control and Prevention [<xref ref-type="bibr" rid="ref45">45</xref>] describes its public health approach to prevention of violence, including firearm violence, as encompassing four steps: defining and monitoring the problem, identifying risk and protective factors, developing and testing prevention strategies, and assuring widespread adoption [<xref ref-type="bibr" rid="ref46">46</xref>]. For firearm violence, the first step—building a foundation of information for describing the epidemiology of such violence—requires focused resources and development. In addition to recent developments in survey, administrative, and other data, such as the important efforts by news media and other organizations to track gun violence incidents in significant detail and the advent of data scraping from obituaries [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>], social media data are a promising future source. This research provides an important building block for future work that continues to develop the usefulness of social media data, alone or in conjunction with other data resources, to strengthen the information base on which firearm research relies, and, more generally, contributes to the process of integrating emerging big data algorithms and traditional data sources for behavioral understanding, decision support, and evidence-based public policy.</p>
        <p>As we build out the power of social media data for informing public health problems such as firearm violence, several important dimensions need to be kept in mind. The role that social media may play in exacerbating gun violence or spreading trauma related to gun violence cannot be ignored. However, these data can also be used to help target and improve our understanding of those who use guns and allow for new approaches to gun violence–prevention interventions [<xref ref-type="bibr" rid="ref49">49</xref>]. To use these data to improve public health outcomes and our understanding of human beliefs and behaviors, we must spearhead establishing best practices for using social media data in ethical ways [<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref52">52</xref>], as well as understanding representativeness, methodological limitations, and algorithmic biases.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Hashtags, keywords, and phrases used to collect data from the Twitter application programming interface.</p>
        <media xlink:href="jmir_v24i8e38319_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 32 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Instructions provided to Amazon Mechanical Turk coders.</p>
        <media xlink:href="jmir_v24i8e38319_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 39 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">MDI</term>
          <def>
            <p>Massive Data Institute</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">MTurk</term>
          <def>
            <p>Amazon Mechanical Turk</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NVSS</term>
          <def>
            <p>National Vital Statistics System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This project was conducted at the Massive Data Institute (MDI) at Georgetown University, thanks to funding by the National Collaborative on Gun Violence Research and the MDI. The authors also thank Rebecca Vanarsdall and the MDI technical team for their support.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>LS and CRG conceptualized the project, led the team, and wrote the paper. YW and SH conducted the main analysis.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Fast Facts: Firearm Violence Prevention</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2021</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/violenceprevention/firearms/fastfact.html;">https://www.cdc.gov/violenceprevention/firearms/fastfact.html;</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Gun violence is a public health crisis [Fact sheet]</article-title>
          <source>American Public Health Association (APHA)</source>
          <year>2021</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.apha.org/-/media/Files/PDF/factsheets/200221_Gun_Violence_Fact_Sheet.ashx">https://www.apha.org/-/media/Files/PDF/factsheets/200221_Gun_Violence_Fact_Sheet.ashx</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wintemute</surname>
              <given-names>GJ</given-names>
            </name>
          </person-group>
          <article-title>The epidemiology of firearm violence in the twenty-first century United States</article-title>
          <source>Annu Rev Public Health</source>
          <year>2015</year>
          <month>03</month>
          <day>18</day>
          <volume>36</volume>
          <fpage>5</fpage>
          <lpage>19</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-031914-122535</pub-id>
          <pub-id pub-id-type="medline">25533263</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morrall</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Ramchand</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Smart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gresenz</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Cherney</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nicosia</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Holliday</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Sayers</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Schell</surname>
              <given-names>TL</given-names>
            </name>
          </person-group>
          <source>The Science of Gun Policy: A Critical Synthesis of Research Evidence on the Effects of Gun Policies in the United States</source>
          <year>2018</year>
          <publisher-loc>Santa Monica, CA, USA</publisher-loc>
          <publisher-name>RAND Corporation</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Morral</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Smucker</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cherney</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schell</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahluwalia</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Cefalu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xenakis</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ramchand</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>The Science of Gun Policy: A Critical Synthesis of Research Evidence on the Effects of Gun Policies in the United States. 2nd Edition</source>
          <year>2020</year>
          <publisher-loc>Santa Monica, CA, USA</publisher-loc>
          <publisher-name>RAND Corporation</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <article-title>The State of Firearms Data in 2019: First Report of the Expert Panel on Firearms Data Infrastructure</article-title>
          <source>NORC at the University of Chicago</source>
          <year>2020</year>
          <month>1</month>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.norc.org/PDFs/Firearm%20Data%20Infrastructure%20Expert%20Panel/State%20of%20Firearms%20Research%202019.pdf">https://www.norc.org/PDFs/Firearm%20Data%20Infrastructure%20Expert%20Panel/State%20of%20Firearms%20Research%202019.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stark</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Funding and publication of research on gun violence and other leading causes of death</article-title>
          <source>JAMA</source>
          <year>2017</year>
          <month>01</month>
          <day>03</day>
          <volume>317</volume>
          <issue>1</issue>
          <fpage>84</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2016.16215</pub-id>
          <pub-id pub-id-type="medline">28030692</pub-id>
          <pub-id pub-id-type="pii">2595514</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mayer-Schönberger</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Cukier</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <source>Big Data: A Revolution that will Transform How We Live, Work, and Think</source>
          <year>2013</year>
          <publisher-loc>Boston, MA, USA</publisher-loc>
          <publisher-name>Houghton Mifflin Harcourt</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>How social media will change public health</article-title>
          <source>IEEE Intell Syst</source>
          <year>2012</year>
          <month>07</month>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>81</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1109/mis.2012.76</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stieglitz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dang-Xuan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bruns</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Neuberger</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Social media analytics</article-title>
          <source>Bus Inf Syst Eng</source>
          <year>2014</year>
          <month>2</month>
          <day>15</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>89</fpage>
          <lpage>96</lpage>
          <pub-id pub-id-type="doi">10.1007/s12599-014-0315-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gamon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Counts</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Horvitz</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Predicting depression via social media</article-title>
          <source>Proc Int AAI Conf Web Soc Media</source>
          <year>2013</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>128</fpage>
          <lpage>37</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Maitland</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Data analytics and displacement: using big data to forecast mass movement of people</article-title>
          <source>Digital Lifeline?: ICTs for Refugees and Displaced Persons</source>
          <year>2018</year>
          <publisher-loc>Cambridge, MA, USA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
          <fpage>185</fpage>
          <lpage>206</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Traugott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bode</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Budak</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Davis-Kean</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Guha</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ladd</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mneimneh</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Pasek</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Raghunathan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Soroka</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wahedi</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Data Blending: Haven’t We Been Doing This for Years?</article-title>
          <source>Massice Data Institute, Georgetown University</source>
          <year>2020</year>
          <month>4</month>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mccourt.georgetown.edu/wp-content/uploads/2022/02/MDI-Data-Blending-White-Paper-April2020_ACCESSIBLE.pdf">https://mccourt.georgetown.edu/wp-content/uploads/2022/02/MDI-Data-Blending-White-Paper-April2020_ACCESSIBLE.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Big data. The parable of Google Flu: traps in big data analysis</article-title>
          <source>Science</source>
          <year>2014</year>
          <month>03</month>
          <day>14</day>
          <volume>343</volume>
          <issue>6176</issue>
          <fpage>1203</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1126/science.1248506</pub-id>
          <pub-id pub-id-type="medline">24626916</pub-id>
          <pub-id pub-id-type="pii">343/6176/1203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ladd</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bode</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Budak</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Conrad</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Cooksey</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dacis-Kean</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dworak-Fisher</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Freelon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hopkins</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kelley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mneimneh</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Pasek</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Raghunathan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gresenz</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Soroka</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Traugott</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Measurement considerations for quantitative social science research using social media data</article-title>
          <source>PsyArXiv</source>
          <year>2020</year>
          <month>12</month>
          <pub-id pub-id-type="doi">10.31234/osf.io/ga6nc</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vayena</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Salathé</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Madoff</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Ethical challenges of big data in public health</article-title>
          <source>PLoS Comput Biol</source>
          <year>2015</year>
          <month>02</month>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>e1003904</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pcbi.1003904"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1003904</pub-id>
          <pub-id pub-id-type="medline">25664461</pub-id>
          <pub-id pub-id-type="pii">PCOMPBIOL-D-14-00778</pub-id>
          <pub-id pub-id-type="pmcid">PMC4321985</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Varghese</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Donnelly</surname>
              <given-names>PD</given-names>
            </name>
          </person-group>
          <article-title>A machine learning analysis of Twitter sentiment to the Sandy Hook shootings</article-title>
          <source>Proceedings of the IEEE 12th International Conference on e-Science</source>
          <year>2016</year>
          <conf-name>e-Science '16</conf-name>
          <conf-date>October 23-27, 2016</conf-date>
          <conf-loc>Baltimore, MD, USA</conf-loc>
          <fpage>303</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1109/escience.2016.7870913</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Starbird</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Examining the alternative media ecosystem through the production of alternative narratives of mass shooting events on Twitter</article-title>
          <source>Proc Int AAI Conf Web Soc Media</source>
          <year>2017</year>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>230</fpage>
          <lpage>9</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>YR</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>WT</given-names>
            </name>
          </person-group>
          <article-title>The dynamics of Twitter users’ gun narratives across major mass shooting events</article-title>
          <source>Humanit Soc Sci Commun</source>
          <year>2020</year>
          <month>08</month>
          <day>03</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>46</fpage>
          <pub-id pub-id-type="doi">10.1057/s41599-020-00533-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blankenship</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Graham</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Assessing the social and emotional costs of mass shootings with Twitter data</article-title>
          <source>Brookings</source>
          <year>2021</year>
          <month>5</month>
          <day>5</day>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.brookings.edu/blog/up-front/2021/05/05/assessing-the-social-and-emotional-costs-of-mass-shootings-with-twitter-data/">https://www.brookings.edu/blog/up-front/2021/05/05/assessing-the-social-and-emotional-costs-of-mass-shootings-with-twitter-data/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kalin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dabrowski</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Characteristics of gun advertisements on social media: systematic search and content analysis of Twitter and YouTube posts</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>03</month>
          <day>27</day>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>e15736</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/3/e15736/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/15736</pub-id>
          <pub-id pub-id-type="medline">32217496</pub-id>
          <pub-id pub-id-type="pii">v22i3e15736</pub-id>
          <pub-id pub-id-type="pmcid">PMC7148552</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wasike</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Persuasion in 140 characters: testing issue framing, persuasion and credibility via Twitter and online news articles in the gun control debate</article-title>
          <source>Comput Human Behav</source>
          <year>2017</year>
          <month>01</month>
          <volume>66</volume>
          <fpage>179</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2016.09.037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hancock</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ayers</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>After Sandy Hook Elementary: a year in the gun control debate on Twitter</article-title>
          <source>arXiv</source>
          <year>2016</year>
          <month>10</month>
          <day>6</day>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <article-title>Leading countries based on number of Twitter users as of October 2021</article-title>
          <source>Statistics</source>
          <year>2022</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.statista.com/statistics/242606/number-of-active-twitter-users-in-selected-countries/">https://www.statista.com/statistics/242606/number-of-active-twitter-users-in-selected-countries/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kawintiranon</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Budak</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Traditional and context-specific spam detection in low resource settings</article-title>
          <source>Mach Learn</source>
          <year>2022</year>
          <month>06</month>
          <day>09</day>
          <volume>111</volume>
          <issue>7</issue>
          <fpage>2515</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1007/s10994-022-06176-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Litman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Conducting Online Research on Amazon Mechanical Turk and Beyond</source>
          <year>2020</year>
          <publisher-loc>Thousand Oaks, CA, USA</publisher-loc>
          <publisher-name>Sage Publications</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schnoebelen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kuperman</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Using Amazon Mechanical Turk for linguistic research</article-title>
          <source>Psihologija</source>
          <year>2010</year>
          <volume>43</volume>
          <issue>4</issue>
          <fpage>441</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.2298/psi1004441s</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kees</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burton</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sheehan</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>An analysis of data quality: professional panels, student subject pools, and Amazon's Mechanical Turk</article-title>
          <source>J Advert</source>
          <year>2017</year>
          <month>01</month>
          <day>23</day>
          <volume>46</volume>
          <issue>1</issue>
          <fpage>141</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1080/00913367.2016.1269304</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Semuels</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The Internet Is Enabling a New Kind of Poorly Paid Hell</article-title>
          <source>The Atlantic</source>
          <year>2018</year>
          <month>1</month>
          <day>23</day>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.theatlantic.com/business/archive/2018/01/amazon-mechanical-turk/551192/">https://www.theatlantic.com/business/archive/2018/01/amazon-mechanical-turk/551192/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Newman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>I Found Work on an Amazon Website. I Made 97 Cents an Hour</article-title>
          <source>The New York Times</source>
          <year>2019</year>
          <month>11</month>
          <day>5</day>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nytimes.com/interactive/2019/11/15/nyregion/amazon-mechanical-turk.html">https://www.nytimes.com/interactive/2019/11/15/nyregion/amazon-mechanical-turk.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moss</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenzweig</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jaffe</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>LItman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Is it ethical to use Mechanical Turk for behavioral research? Relevant data from a representative survey of MTurk participants and wages</article-title>
          <source>PsyArXiv</source>
          <year>2020</year>
          <month>4</month>
          <day>28</day>
          <pub-id pub-id-type="doi">10.31234/osf.io/jbc9d</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Amazon Mechanical Turk</collab>
          </person-group>
          <article-title>Requester best practices guide</article-title>
          <source>Amazon Web Services</source>
          <year>2011</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mturkpublic.s3.amazonaws.com/docs/MTURK_BP.pdf">https://mturkpublic.s3.amazonaws.com/docs/MTURK_BP.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Vanarsdall</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gresenz</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Students or Mechanical Turk: who are the more reliable social media data labelers?</article-title>
          <source>Proceedings of the 11th International Conference on Data Science, Technology and Applications</source>
          <year>2022</year>
          <conf-name>DATA '22</conf-name>
          <conf-date>July 11-13, 2022</conf-date>
          <conf-loc>Lisbon, Portugal</conf-loc>
          <fpage>408</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.5220/0011278600003269</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <source>The Elements of Statistical Learning: Data Mining, Inference, and Prediction. 2nd edition</source>
          <year>2009</year>
          <publisher-loc>New York, NY, USA</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <article-title>Metropolitan and micropolitan statistical areas population totals and components of change: 2010-2019</article-title>
          <source>United States Census Bureau</source>
          <year>2021</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.census.gov/data/datasets/time-series/demo/popest/2010s-total-metro-and-micro-statistical-areas.html">https://www.census.gov/data/datasets/time-series/demo/popest/2010s-total-metro-and-micro-statistical-areas.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kohavi</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A study of cross-validation and bootstrap for accuracy estimation and model selection</article-title>
          <source>Proceedings of the 14th International Joint Conference on Artificial intelligence - Volume 2</source>
          <year>1995</year>
          <conf-name>IJCAI '95</conf-name>
          <conf-date>August 20-25, 1995</conf-date>
          <conf-loc>Montreal, Canada</conf-loc>
          <fpage>1137</fpage>
          <lpage>43</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>Gun violence archive Database</article-title>
          <source>Gun Violence Archive (GVA)</source>
          <year>2022</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gunviolencearchive.org/about">https://www.gunviolencearchive.org/about</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <article-title>Mass Shootings in America</article-title>
          <source>Everytown Research</source>
          <year>2021</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://everytownresearch.org/maps/mass-shootings-in-america/">https://everytownresearch.org/maps/mass-shootings-in-america/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Densley</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>The violence project database of mass shootings in the United States, 1966-2019</article-title>
          <source>The Violence Project</source>
          <year>2019</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.theviolenceproject.org">https://www.theviolenceproject.org</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McKinstry</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McPhee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Raghunathan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Traugott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Turakhia</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wycoff</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Understanding Who Uses Twitter: State level estimates of those on Twitter. MOSAIC Methods Brief</article-title>
          <source>Measuring Online Social Attitudes and Information Collaborative, University of Michigan</source>
          <year>2021</year>
          <month>11</month>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mosaic.mdi.georgetown.edu/wp-content/uploads/2021/11/MOSAIC-Methods-Brief-State-Level-Twitter-Use_11092021.pdf">https://mosaic.mdi.georgetown.edu/wp-content/uploads/2021/11/MOSAIC-Methods-Brief-State-Level-Twitter-Use_11092021.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sims</surname>
              <given-names>CA</given-names>
            </name>
          </person-group>
          <article-title>Bayesian skepticism on unit root econometrics</article-title>
          <source>J Econ Dyn Control</source>
          <year>1988</year>
          <month>6</month>
          <volume>12</volume>
          <issue>2-3</issue>
          <fpage>463</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1016/0165-1889(88)90050-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Dunsmuir</surname>
              <given-names>WT</given-names>
            </name>
          </person-group>
          <article-title>Dangers and uses of cross-correlation in analyzing time series in perception, performance, movement, and neuroscience: the importance of constructing transfer function autoregressive models</article-title>
          <source>Behav Res Methods</source>
          <year>2016</year>
          <month>06</month>
          <volume>48</volume>
          <issue>2</issue>
          <fpage>783</fpage>
          <lpage>802</lpage>
          <pub-id pub-id-type="doi">10.3758/s13428-015-0611-2</pub-id>
          <pub-id pub-id-type="medline">26100765</pub-id>
          <pub-id pub-id-type="pii">10.3758/s13428-015-0611-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Padden</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Davis-Kean</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>David</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Marwadi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Vanarsdall</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Text analytic research portals: supporting large-scale social science research</article-title>
          <source>Proceedings of the 2021 IEEE International Conference on Big Data</source>
          <year>2021</year>
          <conf-name>BigData '21</conf-name>
          <conf-date>December 15-18, 2021</conf-date>
          <conf-loc>Orlando, FL, USA</conf-loc>
          <fpage>6020</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1109/BigData52589.2021.9671696</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <article-title>Social media fact sheet</article-title>
          <source>Pew Research Center</source>
          <year>2021</year>
          <month>4</month>
          <day>7</day>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/fact-sheet/social-media/">https://www.pewresearch.org/internet/fact-sheet/social-media/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <article-title>The public health approach to violence prevention</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2022</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/violenceprevention/about/publichealthapproach.html">https://www.cdc.gov/violenceprevention/about/publichealthapproach.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dahlberg</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Krug</surname>
              <given-names>EG</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Krug</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Dahlberg</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Mercy</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Zwi</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Lozano</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Violence: a global public health problem</article-title>
          <source>World Report on Violence and Health</source>
          <year>2002</year>
          <publisher-loc>Geneva, Switzerland</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
          <fpage>1</fpage>
          <lpage>21</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <article-title>Fatal Force</article-title>
          <source>The Washington Post</source>
          <year>2022</year>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.washingtonpost.com/graphics/investigations/police-shootings-database/">https://www.washingtonpost.com/graphics/investigations/police-shootings-database/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schnell</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Redlich</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Web scraping online newspaper death notices for the estimation of the local number of deaths</article-title>
          <source>Proceedings of the 12th International Joint Conference on Biomedical Engineering Systems and Technologies</source>
          <year>2019</year>
          <conf-name>BIOSTEC '19</conf-name>
          <conf-date>February 22–24, 2019</conf-date>
          <conf-loc>Prague, Czech Republic</conf-loc>
          <fpage>319</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.5220/0007382603190325</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patton</surname>
              <given-names>DU</given-names>
            </name>
            <name name-style="western">
              <surname>McGregor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Slutkin</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Youth gun violence prevention in a digital age</article-title>
          <source>Pediatrics</source>
          <year>2018</year>
          <month>04</month>
          <volume>141</volume>
          <issue>4</issue>
          <fpage>e20172438</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29496903"/>
          </comment>
          <pub-id pub-id-type="doi">10.1542/peds.2017-2438</pub-id>
          <pub-id pub-id-type="medline">29496903</pub-id>
          <pub-id pub-id-type="pii">peds.2017-2438</pub-id>
          <pub-id pub-id-type="pmcid">PMC8892939</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polyzou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Farr</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gresenz</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Social Media Data - Our Ethical Conundrum. A Quarterly bulletin of the IEEE Computer Society Technical Committee on Database Engineering</article-title>
          <source>National Science Foundation</source>
          <year>2020</year>
          <month>12</month>
          <access-date>2022-06-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://sites.computer.org/debull/A20dec/p23.pdf">http://sites.computer.org/debull/A20dec/p23.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bowen</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Using classic social media cases to distill ethical guidelines for digital engagement</article-title>
          <source>J Mass Media Ethics</source>
          <year>2013</year>
          <month>04</month>
          <volume>28</volume>
          <issue>2</issue>
          <fpage>119</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1080/08900523.2013.793523</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leonelli</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lovell</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wheeler</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Fleming</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>From FAIR data to fair data use: methodological data fairness in health-related social media research</article-title>
          <source>Big Data Soc</source>
          <year>2021</year>
          <month>05</month>
          <day>03</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>205395172110103</fpage>
          <pub-id pub-id-type="doi">10.1177/20539517211010310</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
