<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i6e17196</article-id>
      <article-id pub-id-type="pmid">32579119</article-id>
      <article-id pub-id-type="doi">10.2196/17196</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Association Between HIV-Related Tweets and HIV Incidence in the United States: Infodemiology Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lortz</surname>
            <given-names>Julia</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Black</surname>
            <given-names>Joshua</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Stevens</surname>
            <given-names>Robin</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Family and Community Health</institution>
            <institution>University of Pennsylvania School of Nursing</institution>
            <addr-line>416 Curie Boulevard</addr-line>
            <addr-line>Philadelphia, PA, 19104</addr-line>
            <country>United States</country>
            <phone>1 2158984063</phone>
            <email>robin.stevens@usc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0481-9983</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Bonett</surname>
            <given-names>Stephen</given-names>
          </name>
          <degrees>BSN</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7608-3741</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Bannon</surname>
            <given-names>Jacqueline</given-names>
          </name>
          <degrees>RN, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7800-0321</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Chittamuru</surname>
            <given-names>Deepti</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2411-9658</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Slaff</surname>
            <given-names>Barry</given-names>
          </name>
          <degrees>MSE, MCIT</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1539-2591</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Browne</surname>
            <given-names>Safa K</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5528-2805</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Sarah</given-names>
          </name>
          <degrees>BSN</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3657-9934</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Bauermeister</surname>
            <given-names>José A</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9276-2306</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Family and Community Health</institution>
        <institution>University of Pennsylvania School of Nursing</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>University of California Merced</institution>
        <addr-line>Merced, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Children's Hospital of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Robin Stevens <email>robin.stevens@usc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>24</day>
        <month>6</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>6</issue>
      <elocation-id>e17196</elocation-id>
      <history>
        <date date-type="received">
          <day>25</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>10</day>
          <month>1</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>5</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>22</day>
          <month>3</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Robin Stevens, Stephen Bonett, Jacqueline Bannon, Deepti Chittamuru, Barry Slaff, Safa K Browne, Sarah Huang, José A Bauermeister. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 24.06.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2020/6/e17196" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Adolescents and young adults in the age range of 13-24 years are at the highest risk of developing HIV infections. As social media platforms are extremely popular among youths, researchers can utilize these platforms to curb the HIV epidemic by investigating the associations between the discourses on HIV infections and the epidemiological data of HIV infections.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The goal of this study was to examine how Twitter activity among young men is related to the incidence of HIV infection in the population.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used integrated human-computer techniques to characterize the HIV-related tweets by male adolescents and young male adults (age range: 13-24 years). We identified tweets related to HIV risk and prevention by using natural language processing (NLP). Our NLP algorithm identified 89.1% (2243/2517) relevant tweets, which were manually coded by expert coders. We coded 1577 HIV-prevention tweets and 17.5% (940/5372) of general sex-related tweets (including emojis, gifs, and images), and we achieved reliability with intraclass correlation at 0.80 or higher on key constructs. Bivariate and multivariate analyses were performed to identify the spatial patterns in posting HIV-related tweets as well as the relationships between the tweets and local HIV infection rates.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We analyzed 2517 tweets that were identified as relevant to HIV risk and prevention tags; these tweets were geolocated in 109 counties throughout the United States. After adjusting for region, HIV prevalence, and social disadvantage index, our findings indicated that every 100-tweet increase in HIV-specific tweets per capita from noninstitutional accounts was associated with a multiplicative effect of 0.97 (95% CI [0.94-1.00]; <italic>P</italic>=.04) on the incidence of HIV infections in the following year in a given county.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Twitter may serve as a proxy of public behavior related to HIV infections, and the association between the number of HIV-related tweets and HIV infection rates further supports the use of social media for HIV disease prevention.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>HIV/AIDS</kwd>
        <kwd>social media</kwd>
        <kwd>youth</kwd>
        <kwd>natural language processing</kwd>
        <kwd>surveillance</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The highest burden of new HIV infections has been reported in adolescents and young adults between the ages of 13 and 24 years, with 37.1% of the new HIV infections occurring in this age group in the United States [<xref ref-type="bibr" rid="ref1">1</xref>]. Among the youths in this age group, 87% of the individuals diagnosed with HIV infection were reported to be young men, and 51% of these young men were identified as African American, while 25% of these young men were identified as Hispanic/Latino [<xref ref-type="bibr" rid="ref1">1</xref>]. With the rapid increase in the usage of social media over the last 15 years, Twitter has emerged as a popular social networking platform. Studies have shown that Twitter is used by 32% of the adolescents and 44% of the young adults, with Black youths reporting higher levels of use than their white and Latino peers [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Since Twitter is used to discuss health-related and risk-related topics [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>], this platform offers a distinct opportunity to investigate the attitudes, beliefs, and behaviors of the youths via their publicly shared posts that they have created or to which they have responded. This unique content may provide additional insights into the sentiments and discourses of youths [<xref ref-type="bibr" rid="ref7">7</xref>] beyond what can be identified in traditional formative research methods, particularly at the national level. Analysis of Twitter, for example, might offer insight into the HIV-related beliefs and attitudes of youths of different races/ethnicities and help inform interventions that are designed to curb the HIV epidemic among youths.</p>
      <p>The popularity of Twitter and the high volume of public tweets provide unprecedented access to discourses about sexual health and HIV by youths across a country. Although youths use social media platforms such as Twitter to share and seek sexual health information and to communicate with romantic and sexual partners [<xref ref-type="bibr" rid="ref8">8</xref>], research on tweets related to alcohol, marijuana, cancer, and vaccines has shown that Twitter is also used to promote risky behaviors, spread misinformation, and reinforce HIV- and sexually transmitted infection (STI)-related stigmas [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Several studies have also considered social media messages as surveillance data to monitor the incidence of influenza, depression, Zika virus infections, and substance use [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Similar techniques have been used to assess the associations between social media messages on sex and HIV and the risk behavior and HIV incidence [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      <p>Several studies have shown evidence of a correlation between HIV-related tweets and HIV prevalence in a population [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Two studies [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>] showed that future-oriented and action-based tweets regarding HIV were associated with decreased incidence of HIV infections at the county level in the United States. In contrast, Young et al [<xref ref-type="bibr" rid="ref8">8</xref>] found that there was a statistically significant positive association between HIV-related tweets and HIV prevalence. However, these studies [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>] did not distinguish between the source of the tweet; instead, they combined tweets from individual users and institutions such as public health agencies in their analyses. Importantly, studies on HIV and social media focus on certain keywords such as “sex,” “HIV testing,” and “discrimination.” Although the use of these keywords is useful for examining the associations between HIV-related tweets and HIV prevalence, studies often have reduced sensitivity to retrieve relevant tweets for analysis and intervention [<xref ref-type="bibr" rid="ref16">16</xref>]. Since the abovementioned studies have provided promising evidence that tweets may be associated with HIV risk, there is a need for in-depth contextualized analysis of Twitter messages on risky sexual behavior and health, including analyses of message source variations. Therefore, the goal of our study was to explore how Twitter activity is related to HIV incidence and whether message characteristics such as content and source can reveal the incidence of HIV infection in a population and the future risks associated with HIV. Twitter messages may serve as a signal of the real-time dynamics in HIV epidemiology. In this study, we combined in-depth content analysis of HIV-related tweets with automated machine learning techniques to analyze the county-level associations between HIV-related tweets and new HIV infections in the United States.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Sample</title>
        <p>Using the Twitter “fire hose” application programming interface, which provides broad access to public Twitter data, we drew a random sample of 1% of publicly available tweets posted between January 1, 2016 and December 31, 2016. We sampled tweets from users who tweeted at least 500 words in 2016 and who were geolocated in a county in the United States. To determine the geolocation, we used two types of data: tweet-specific latitude/longitude coordinates and the self-reported location information in Twitter user profiles. The distribution of the geolocated tweets by county approximate the US population density [<xref ref-type="bibr" rid="ref17">17</xref>]. Duplicate tweets, bots, and non-English tweets were removed [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. After we produced age and gender affiliation estimates for each user with HIV-related tweets, based on our tested algorithms [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>], we limited our sample to users with predicted age (range, 13-24 years) and predicted gender (males only). Using previous literature [<xref ref-type="bibr" rid="ref22">22</xref>] and input from our young researchers, we developed a keyword list of HIV-related terms such as HIV, AIDS, HIV testing, condoms, multiple sexual partners, STI, risky sexual behavior, and pre-exposure prophylaxis (PrEP). PrEP is an effective HIV prevention medication taken prior to exposure to the virus. This keyword list was used to identify relevant tweets, and we extracted 9707 HIV-related tweets from 6439 users from the age/gender stratified sample. We also removed pornographic tweets by developing a classifier to identify pornography and excluded those tweets from our data set. Our final data set included 6949 tweets by 1541 young male adults and male adolescents in the United States, and these tweets contained at least one relevant keyword. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the number of messages and users retained at each step of the above described process.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Twitter sample retrieval flowchart. API: application programming interface.</p>
          </caption>
          <graphic xlink:href="jmir_v22i6e17196_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Content Analysis</title>
        <p>Data analysis was conducted with a sample of HIV-related tweets posted on Twitter. We manually coded a sample of HIV-related tweets by oversampling tweets with HIV-specific keywords. To accomplish this, we grouped our keywords into 2 broad categories: HIV prevention-specific tweets (n=1577) and general sex-related tweets (n=5372). We initially included a third category, namely, risk-related, which included risk behavior-promoting (n=6) tweets. However, we excluded this category from our analysis owing to the small number of tweets in this category<italic>.</italic> From the final data set, we took the full sample of 1577 prevention-related and a 17.5% (940/5372) random sample of general sex-related tweets, yielding 940 general sex-related tweets for manual content analysis.</p>
        <p>The final data set (2517 tweets by 596 users) was coded by 4 expert coders for 19 nonexclusive categories. To capture the context, we expanded the coding unit beyond the initial tweet. The coders read the 5 tweets that proceeded from the tweet and the 5 tweets that followed the tweet. They also reviewed the images or webpages linked to the tweet. The coders achieved reliability by using a separate training data set of tweets, which was created through the same procedure used for the sample data set. We used a training set to train the coders without depleting the main data set. During training, the coders reconciled the differences in the code interpretations and coding approaches as a team. After the coding schema was finalized, the 4 coders achieved intercoder reliability on key constructs assessed with an intraclass correlation at 0.80 or higher. Approximately 20% (500/2517) of the final data set was coded by at least two coders.</p>
      </sec>
      <sec>
        <title>Measures</title>
        <p>The HIV incidence—the outcome variable—was assessed as the number of new cases of HIV infections in a given county in 2017. These data were sourced from the Centers for Disease Control and Prevention AtlasPlus data platform [<xref ref-type="bibr" rid="ref23">23</xref>]. Counties with suppressed data owing to low case counts were assigned a value of 2, which represented the midpoint between the lowest possible suppressed value of 1 and the highest possible suppressed value of 4.</p>
        <p>Twitter messages were classified into the following 3 categories in 2016: risk-specific Twitter activity, prevention-specific Twitter activity, and HIV-specific Twitter activity. Risk-specific Twitter activity is the sum of all the tweets categorized with a risk-related code (eg, multiple partners, pro risk-taking, substance use, transactional sex, and unprotected sex) in a given county, per 100,000 residents. Prevention-specific Twitter activity is the sum of all the tweets categorized as prevention-related (eg, antirisk taking, condoms, HIV testing, HIV/AIDS, PrEP, research, education, and news) in a given county, per 100,000 residents. HIV-specific Twitter activity is the sum of the risk-specific and prevention-specific Twitter activities, in addition to the tweets that were tagged as related to LGBTQ content. All users in our data set were identified as either an individual or an institution based on the manual review of the user profile and recent posting activity. Institutions included public health agencies, social service organizations, and advocacy groups, and typically included the organization name in the username or user description. Our final measures of the tweets consisted of 3 Twitter activity categories (risk, prevention, or HIV-specific) from individuals or institutions, resulting in 6 Twitter variables.</p>
        <p>We accounted for 3 geographic control variables: HIV prevalence, social disadvantage, and census region. HIV prevalence in a geographic area is the key epidemiological factor linked to the number of new cases in that area [<xref ref-type="bibr" rid="ref24">24</xref>]. We used county-level HIV prevalence rates in 2015 to account for the existing patterns of HIV infection. Counties with suppressed data owing to low case counts were assigned case counts of 6, which represented the midpoint between the lowest possible suppressed value of 1 and the highest possible suppressed value of 11. Studies have also shown that socioeconomic factors measured at the city-wide level are the key drivers of new HIV infections [<xref ref-type="bibr" rid="ref25">25</xref>]. To capture the combined effect of multiple dimensions of socioeconomic disadvantage, we calculated the social disadvantage index at the county level for the counties in our study (Cronbach α=.82) [<xref ref-type="bibr" rid="ref26">26</xref>]. This index was calculated by summing the z-scores for the percentage of the population living in poverty, the percentage of the population with a high school degree or equivalent, the median household income, and the percentage of the population lacking health insurance. These measures were obtained from the US Census Bureau Small Area Income and Poverty Estimates and the American Community Survey [<xref ref-type="bibr" rid="ref27">27</xref>]. Negative weights were applied to high school education and median income, yielding an index that reflected greater social disadvantage for high values of the index and lesser social disadvantage for low values of the index. Census region was included to account for the regional variations in the HIV epidemic. The four regions, that is, northeast, south, Midwest, and west regions of the United States, were treated as the control variables in the models.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>General sex-related tweets were given sample weights of 6.25 for all the analyses to reflect the random samplings performed to reduce the data for coding. We used the Wilcoxon rank sum test with continuity correction for large samples to compare county tweet outputs based on the message source (ie, tweets from individuals vs institutions). We used negative binomial regression to estimate the effects of Twitter activity on HIV incidence at the county level. Our outcome of interest for this analysis was the rate of new diagnoses of HIV infections per capita at the county level. To model this rate variable, we included an offset term for the county population in 2017 in our regression analysis [<xref ref-type="bibr" rid="ref28">28</xref>]. Negative binomial regression was chosen because our county outcome variables showed significant overdispersion from the Poisson distribution. Unadjusted models were run first for each of the 6 Twitter variables and the 3 control variables. Separate multivariate models were run for each of the Twitter variables, thereby adjusting for all the control variables. Variance inflation factors were examined for all final models, and none showed evidence of multicollinearity. Analyses were performed in R-3.5.1 [<xref ref-type="bibr" rid="ref29">29</xref>] using the MASS package [<xref ref-type="bibr" rid="ref30">30</xref>] glm.nb() function for negative binomial regression.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Descriptive and Geospatial Data</title>
        <p>Our data included 2517 tweets that were identified as potentially relevant to HIV risk (eg, unprotected sex) and prevention tags (eg, condom use, HIV testing, research, education), and these tweets originated in 109 counties across the United States. Of these, 940 were general sex-related tweets (including emojis, gifs, and images) and were given a sample weight to reflect our random sampling procedure. Each tweet in our data set represents 100 tweets in the real world as our data was drawn from 1% of publicly available tweets. However, we have reported all our results in units of true tweets, which were calculated by multiplying our results by 100. In 2016, 321 HIV-specific tweets, on an average, originated from individuals in each county. Counties had an average of 143 prevention-related and 118 risk-related tweets from individuals. An average of 944 HIV-specific tweets, 843 prevention-related tweets, and 31 risk-related tweets originating from institutions were sourced to each county. Institutions tweeted significantly more HIV-related (U=67,812; <italic>P&#60;</italic>.001) and prevention-related messages (U=62,711; <italic>P&#60;</italic>.001) and significantly less risk-related messages as compared to individuals (U=63,879; <italic>P&#60;</italic>.001). Within counties that had at least one potentially relevant tweet, the median number of new HIV cases diagnosed in 2017 was 70 per county (range: 0-1530). HIV prevalence in these counties ranged from 6.02 to 2590 per 100,000 residents, with a median prevalence rate of 306 per 100,000 residents. The social disadvantage index ranged from –6.52 to 7.53 (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <p>The crude incidence rate ratios (IRRs) for each variable of interest of HIV incidence in 2017 were calculated using negative binomial regression with an offset for the county population (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <p>HIV prevalence in 2017 was positively associated with HIV prevalence in 2015 and social disadvantage index in 2015 (IRR 1.104, 95% CI 1.075-1.134; <italic>P&#60;</italic>.001). Compared to that in the Midwest region, significantly higher HIV incidence was observed in the northeast (IRR 1.286, 95% CI 0.985-1.683; <italic>P&#60;</italic>.001) and south (IRR 2.126, 95% CI 1.711-2.630; <italic>P&#60;</italic>.001) regions of the United States. We did not observe a significant difference (IRR 0.967, 95% CI 0.749-1.250; <italic>P&#62;</italic>.99) in the counties in the west region of the United States. The large number of prevention tweets from individuals in 2016 was significantly associated with the high incidence of HIV in the following year (IRR 1.082, 95% CI 1.003-1.183; <italic>P</italic>=.048). No other significant bivariate associations were found between HIV-related tweets and HIV incidence for combinations of tweet category and user type.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Descriptive statistics at the county level (n=109).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="250"/>
            <col width="350"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Values</td>
              </tr>
              <tr valign="top">
                <td>Descriptive statistics</td>
                <td>Mean (SD)</td>
                <td>Median (Min, Max)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>HIV prevalence case count, 2017</td>
                <td>173 (260)</td>
                <td>70 (0.00, 1530)</td>
              </tr>
              <tr valign="top">
                <td>HIV prevalence case count, 2015</td>
                <td>484 (500)</td>
                <td>306 (6.02, 2590)</td>
              </tr>
              <tr valign="top">
                <td>County population, 2017</td>
                <td>832,000 (1,090,000)</td>
                <td>535,000 (13,900, 8,580,000)</td>
              </tr>
              <tr valign="top">
                <td>Social disadvantage index, 2015</td>
                <td>0.251 (3.06)</td>
                <td>0.463 (–6.52, 7.53)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Crude incidence rate ratios (bivariate models).<sup>a</sup></p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="390"/>
            <col width="0"/>
            <col width="310"/>
            <col width="0"/>
            <col width="80"/>
            <col width="0"/>
            <col width="90"/>
            <col width="0"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Parameters</td>
                <td colspan="2">Crude incidence rate ratio</td>
                <td colspan="4">95% CI</td>
                <td colspan="2"><italic>P</italic> value</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">Upper</td>
                <td colspan="3">Lower</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">HIV tweets 2016, person</td>
                <td colspan="2">1.006</td>
                <td colspan="2">0.975</td>
                <td colspan="2">1.043</td>
                <td colspan="2">.65</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Prevention 2016, person</td>
                <td colspan="2">1.082</td>
                <td colspan="2">1.003</td>
                <td colspan="2">1.183</td>
                <td colspan="2">.048</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Risk tweets 2016, person</td>
                <td colspan="2">0.976</td>
                <td colspan="2">0.931</td>
                <td colspan="2">1.024</td>
                <td colspan="2">.23</td>
              </tr>
              <tr valign="top">
                <td colspan="2">HIV tweets 2016, institution</td>
                <td colspan="2">1.006</td>
                <td colspan="2">0.998</td>
                <td colspan="2">1.016</td>
                <td colspan="2">.13</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Prevention tweets 2016, institution</td>
                <td colspan="2">1.006</td>
                <td colspan="2">0.997</td>
                <td colspan="2">1.018</td>
                <td colspan="2">.16</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Risk tweets 2016, institution</td>
                <td colspan="2">1.155</td>
                <td colspan="2">0.876</td>
                <td colspan="2">1.651</td>
                <td colspan="2">.30</td>
              </tr>
              <tr valign="top">
                <td colspan="2">HIV prevalence, 2015</td>
                <td colspan="2">1.002</td>
                <td colspan="2">1.001</td>
                <td colspan="2">1.002</td>
                <td colspan="2">&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Social disadvantage index</td>
                <td colspan="2">1.104</td>
                <td colspan="2">1.075</td>
                <td colspan="2">1.134</td>
                <td colspan="2">&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Region of the United States</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Midwest</td>
                <td colspan="2">Ref<sup>b</sup></td>
                <td colspan="2">Ref</td>
                <td colspan="2">Ref</td>
                <td>Ref</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Northeast</td>
                <td colspan="2">1.286</td>
                <td colspan="2">0.985</td>
                <td colspan="2">1.683</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">South</td>
                <td colspan="2">2.126</td>
                <td colspan="2">1.711</td>
                <td colspan="2">2.630</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">West</td>
                <td colspan="2">0.967</td>
                <td colspan="2">0.749</td>
                <td colspan="2">1.250</td>
                <td>&#62;.99</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>All tweet variables are reported in units of 100 tweets.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Ref: reference.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Multivariate Analyses</title>
        <p>Multivariate models were used to test the adjusted effects for each of the 3 categories of tweets, for individuals and institutions separately, on HIV incidence in the following year. These models are summarized in <xref ref-type="table" rid="table3">Tables 3</xref> and <xref ref-type="table" rid="table4">4</xref>.</p>
        <p>In all 6 models, HIV prevalence in 2017 was positively associated with HIV prevalence in 2015 and social disadvantage index. Additionally, all 6 models showed a significant difference in HIV incidence between the south and Midwest regions. Only one model, Model 1, showed statistically significant effect for a tweet variable on HIV incidence. In Model 1, HIV-specific tweets originating from individuals were negatively associated with HIV incidence at the county level in the following year, after adjusting for region, HIV prevalence, and social disadvantage index. Each additional 100 HIV-specific tweets per capita that originated from an individual in a given county was associated with a 3% decrease in the incidence rate of HIV in the following year, after adjusting for covariates.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Multivariate models for tweets from individuals.<sup>a</sup></p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="130"/>
            <col width="210"/>
            <col width="70"/>
            <col width="210"/>
            <col width="70"/>
            <col width="210"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="6">HIV incidence per capita, 2017</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">Model 1: HIV-specific tweets</td>
                <td colspan="2">Model 2: Prevention-specific tweets</td>
                <td colspan="2">Model 3: Risk-specific tweets</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Predictors</td>
                <td>Incidence rate ratio (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>Incidence rate ratio (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>Incidence rate ratio (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">HIV-specific Twitter activity, 2016</td>
                <td>0.97 (0.94-1.00)</td>
                <td>.04</td>
                <td>N/A<sup>b</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Prevention-specific Twitter activity, 2016</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>0.95 (0.90-1.01)</td>
                <td>.13</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Risk-specific Twitter activity, 2016</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>1.03 (0.86-1.24)</td>
                <td>.73</td>
              </tr>
              <tr valign="top">
                <td colspan="2">HIV prevalence, 2015</td>
                <td>1.00 (1.00-1.00)</td>
                <td>&#60;.001</td>
                <td>1.00 (1.00-1.00)</td>
                <td>&#60;.001</td>
                <td>1.00 (1.00-1.00)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Social disadvantage index</td>
                <td>1.04 (1.02-1.06)</td>
                <td>&#60;.001</td>
                <td>1.04 (1.02-1.06)</td>
                <td>&#60;.001</td>
                <td>1.04 (1.02-1.06)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Census region</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Midwest</td>
                <td>Ref<sup>c</sup></td>
                <td>Ref</td>
                <td>Ref</td>
                <td>Ref</td>
                <td>Ref</td>
                <td>Ref</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>North</td>
                <td>0.90 (0.73-1.10)</td>
                <td>.30</td>
                <td>0.90 (0.74-1.10)</td>
                <td>.29</td>
                <td>0.90 (0.73-1.11)</td>
                <td>.33</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>South</td>
                <td>1.40 (1.18-1.67)</td>
                <td>&#60;.001</td>
                <td>1.41 (1.20-1.67)</td>
                <td>&#60;.001</td>
                <td>1.41 (1.18-1.68)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>West</td>
                <td>0.94 (0.78-1.15)</td>
                <td>.55</td>
                <td>0.95 (0.79-1.14)</td>
                <td>.56</td>
                <td>0.94 (0.77-1.14)</td>
                <td>.51</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>All tweet variables are reported in units of 100 tweets.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>Ref: reference.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Multivariate models for tweets from institutions.<sup>a</sup></p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="130"/>
            <col width="210"/>
            <col width="70"/>
            <col width="210"/>
            <col width="70"/>
            <col width="210"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="6">HIV incidence per capita, 2017</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">Model 4: HIV-specific tweets</td>
                <td colspan="2">Model 5: Prevention-specific tweets</td>
                <td colspan="2">Model 6: Risk-specific tweets</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Predictors</td>
                <td>Incidence rate ratios (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>Incidence rate ratios (95% CI)</td>
                <td><italic>P</italic> value</td>
                <td>Incidence rate ratios (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">HIV-specific Twitter activity, 2016</td>
                <td>1.00 (0.99-1.00)</td>
                <td>.92</td>
                <td>N/A<sup>b</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Prevention-specific Twitter activity, 2016</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>1.00 (0.99-1.01)</td>
                <td>.996</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Risk-specific Twitter activity, 2016</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>1.03 (0.86-1.24)</td>
                <td>.73</td>
              </tr>
              <tr valign="top">
                <td colspan="2">HIV prevalence, 2015</td>
                <td>1.00 (1.00-1.00)</td>
                <td>&#60;.001</td>
                <td>1.00 (1.00-1.00)</td>
                <td>&#60;.001</td>
                <td>1.00 (1.00-1.00)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Social disadvantage index</td>
                <td>1.04 (1.02-1.06)</td>
                <td>&#60;.001</td>
                <td>1.04 (1.02-1.06)</td>
                <td>&#60;.001</td>
                <td>1.04 (1.02-1.06)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Census region</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Midwest</td>
                <td>Ref<sup>c</sup></td>
                <td>Ref</td>
                <td>Ref</td>
                <td>Ref</td>
                <td>Ref</td>
                <td>Ref</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>North</td>
                <td>0.90 (0.73-1.11)</td>
                <td>.34</td>
                <td>0.90 (0.73-1.11)</td>
                <td>.34</td>
                <td>0.90 (0.73-1.11)</td>
                <td>.33</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>South</td>
                <td>1.41 (1.18-1.68)</td>
                <td>&#60;.001</td>
                <td>1.41 (1.18-1.68)</td>
                <td>&#60;.001</td>
                <td>1.41 (1.18-1.68)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>West</td>
                <td>0.94 (0.77-1.14)</td>
                <td>.51</td>
                <td>0.94 (0.77-1.14)</td>
                <td>.51</td>
                <td>0.94 (0.77-1.14)</td>
                <td>.51</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>All tweet variables are reported in units of 100 tweets.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>N/A: Not applicable.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>Ref: reference.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we analyzed the association between geolocated HIV-related tweets within the United States and the future incidence of HIV infection. HIV-specific tweets were more likely to emerge in those locations in the United States that had a high incidence of HIV. The number of HIV-specific tweets made by institution-associated accounts was higher than that of individual tweets. Interestingly, risk-related information in institution-associated tweets was lesser than that in tweets made by individual users. However, we did not observe significant associations between the number of HIV-specific tweets made by institutions and county-level HIV incidence. In contrast, increased numbers of HIV-specific tweets made by individual users were significantly associated with the decreased number of HIV cases in the following year at the county level, even when controlling for the geographic location. These findings suggest that the source of the tweet plays an important role, with individuals tweeting less about prevention, and these individual tweets showed a strong association with the future outcomes of HIV infections.</p>
        <p>Geolocated conversations regarding HIV infections were negatively associated with county-level HIV incidence. These findings suggest that locations with few HIV-related Twitter posts and conversations by individuals may indicate those that require targeted interventions. Thus, counties with high incidence of HIV infections and few tweets may indicate an opportunity for increased investigation and potential intervention.</p>
        <p>There are several possible reasons for our observation of low incidence of HIV infections in counties with large numbers of HIV-related tweets in the previous year. Increased numbers of HIV-related tweets at the county level could indicate increased community involvement, policy initiatives, and resource utilization in a given county [<xref ref-type="bibr" rid="ref31">31</xref>]. Additionally, increased numbers of HIV-related tweets by individuals could reflect increased activities in addressing various determinants of HIV risks, including limited institutional support and reduced access to health care [<xref ref-type="bibr" rid="ref32">32</xref>]. Although studies have sought to incorporate real-time analysis of Twitter data in association with localized HIV incidence, up-to-date HIV epidemiological data is limited or inaccessible to researchers. It is critical that public health professionals and computer scientists collaborate to develop novel approaches in analyzing Twitter data in accordance with the available HIV epidemiological data.</p>
        <p>Our findings corroborate those of Ireland et al [<xref ref-type="bibr" rid="ref15">15</xref>] but they are in contrast to those reported by Young et al [<xref ref-type="bibr" rid="ref8">8</xref>] who found a positive association between HIV-related tweets and HIV prevalence. Although our study was similar in concept to that conducted by Young et al [<xref ref-type="bibr" rid="ref8">8</xref>], we used a more specific definition of HIV-related tweets by excluding keywords that were less sensitive in our training sample (eg, “fuck”) and including a variety of slang terms that were compiled by our young researchers. Moreover, our study may have some slight differences from that of Young et al [<xref ref-type="bibr" rid="ref8">8</xref>] because of the time period in our study—we may have identified a more recent phenomena in the prevalence of HIV infections. Two previous studies [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>] analyzed the association between the HIV-related tweets and the corresponding epidemiological data in the same time period as considered in our study, whereas Young et al [<xref ref-type="bibr" rid="ref8">8</xref>] analyzed the Twitter data with the epidemiological data of the previous year. The analyses in this study mirror those reported by Young et al [<xref ref-type="bibr" rid="ref8">8</xref>] because we also analyzed the epidemiological data from the year after obtaining our specified frame of tweets in 2016.</p>
        <p>Our findings suggest that discourses on HIV and risky sexual behavior on Twitter may serve as a signal of sexual health outcomes at the aggregate level. However, the low effect size and nonsignificant results of some of the models make it difficult to state this fact conclusively. It is clear that HIV-related discourse is geographically concentrated, and in coordination with epidemiological surveillance efforts, it may be used to inform intervention efforts. Despite the relative rarity of direct discussion of HIV on Twitter, this social media platform is still an important medium for conversations regarding HIV and health behaviors. Given its wide user base, Twitter can serve as a platform for discussing useful HIV prevention strategies, and such platforms deserve further investment as tools to end the HIV epidemic.</p>
        <p>This study has several strengths. Our analysis combined NLP and manual coding, thereby allowing for coding of a large number of tweets for in-depth meaning, while preserving context. Our use of geolocated tweets allowed for location-based analysis with epidemiological and census data. These aspects of our study allowed for contextualized analysis of Twitter data, which may be useful for targeted interventions across the United States.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has several limitations. First, we did not evaluate model significance by using multiple correction comparisons. Our study only used a set of geolocated tweets from 2016, which greatly reduced the available sample. This analysis included 10.8% (339/3141) of all the counties and there may have been different Twitter discourses in other regions that were not included in this analysis. It is possible that tweets that were not geolocated in that year could have revealed additional information about the nature of HIV-specific tweets relative to HIV incidence. Second, we excluded Spanish tweets, which limited our ability to capture the web-based discourse among Latino men. Third, since we focused on deidentified Twitter data, our study does not contain information on individual characteristics or behaviors.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>With the increase of public discourse through Twitter, public health efforts leveraging this social medium are needed. Social media platforms such as Twitter offer an opportunity for health professionals to monitor population health and promote HIV disease prevention. We observed a negative association between HIV-specific tweets made by individual users and HIV incidence in the following calendar year at the county level. Our study underscores the importance of social media as a crucial aspect in the lives of individuals, as these discourses might unearth the youths’ knowledge, attitudes, and beliefs related to HIV. Public health efforts seeking to use social media as a tool for HIV surveillance and intervention are warranted.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">IRR</term>
          <def>
            <p>incidence rate ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">PrEP</term>
          <def>
            <p>pre-exposure prophylaxis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">STI</term>
          <def>
            <p>sexually transmitted infection</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Institute of Health CFAR administrative supplement and the University of Pennsylvania Center of AIDS HIV Disparities Scientific Working Group. The authors would like to acknowledge Genevieve Afrifa, Obumneke Obi, Chinanuekpere Emma Ezeala, and Rashaan Galloway for their brilliant coding assistance.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Centers</surname>
              <given-names>for Disease ControlPrevention</given-names>
            </name>
          </person-group>
          <source>https://www.cdc.gov/hiv/pdf/library/reports/surveillance/cdc-hiv-surveillance-report-2017-vol-29.pdf</source>
          <year>2017</year>
          <access-date>2018-05-08</access-date>
          <publisher-loc>Atlanta, GA</publisher-loc>
          <publisher-name>Centers for Disease Control and Prevention</publisher-name>
          <comment>HIV Surveillance Report, 2017<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/hiv/pdf/library/reports/surveillance/cdc-hiv-surveillance-report-2017-vol-29.pdf">https://www.cdc.gov/hiv/pdf/library/reports/surveillance/cdc-hiv-surveillance-report-2017-vol-29.pdf</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>Aaron</given-names>
            </name>
          </person-group>
          <source>https://www.pewresearch.org/internet/2018/03/01/social-media-use-in-2018/</source>
          <year>2018</year>
          <access-date>2019-01-31</access-date>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>Pew Research Center</publisher-name>
          <comment>Social media use in 2018<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2018/03/01/social-media-use-in-2018/">https://www.pewresearch.org/internet/2018/03/01/social-media-use-in-2018/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perrin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Facttank: News by the Numbers</source>
          <year>2019</year>
          <month>4</month>
          <day>9</day>
          <access-date>2019-06-05</access-date>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>Pew Research Center</publisher-name>
          <comment>Share of U.S. adults using social media, including Facebook, is mostly unchanged since 2018<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/fact-tank/2019/04/10/share-of-u-s-adults-using-social-media-including-facebook-is-mostly-unchanged-since-2018/">https://www.pewresearch.org/fact-tank/2019/04/10/share-of-u-s-adults-using-social-media-including-facebook-is-mostly-unchanged-since-2018/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cabrera-Nguyen</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bierut</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Moreno</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Young Adults' Exposure to Alcohol- and Marijuana-Related Content on Twitter</article-title>
          <source>J Stud Alcohol Drugs</source>
          <year>2016</year>
          <month>03</month>
          <volume>77</volume>
          <issue>2</issue>
          <fpage>349</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26997194"/>
          </comment>
          <pub-id pub-id-type="doi">10.15288/jsad.2016.77.349</pub-id>
          <pub-id pub-id-type="medline">26997194</pub-id>
          <pub-id pub-id-type="pmcid">PMC4803667</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Salyer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grucza</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Bierut</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>Twitter chatter about marijuana</article-title>
          <source>J Adolesc Health</source>
          <year>2015</year>
          <month>02</month>
          <volume>56</volume>
          <issue>2</issue>
          <fpage>139</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25620299"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jadohealth.2014.10.270</pub-id>
          <pub-id pub-id-type="medline">25620299</pub-id>
          <pub-id pub-id-type="pii">S1054-139X(14)00703-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4306811</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gabarron</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Serrano</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Wynn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>AY</given-names>
            </name>
          </person-group>
          <article-title>Tweet content related to sexually transmitted diseases: no joking matter</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>10</month>
          <day>06</day>
          <volume>16</volume>
          <issue>10</issue>
          <fpage>e228</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/10/e228/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3259</pub-id>
          <pub-id pub-id-type="medline">25289463</pub-id>
          <pub-id pub-id-type="pii">v16i10e228</pub-id>
          <pub-id pub-id-type="pmcid">PMC4210955</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kreniske</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Developing a culture of commenting in a first-year seminar</article-title>
          <source>Computers in Human Behavior</source>
          <year>2017</year>
          <month>07</month>
          <volume>72</volume>
          <fpage>724</fpage>
          <lpage>732</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2016.09.060</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Rivers</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Methods of using real-time social media technologies for detection and remote monitoring of HIV outcomes</article-title>
          <source>Prev Med</source>
          <year>2014</year>
          <month>06</month>
          <volume>63</volume>
          <fpage>112</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24513169"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ypmed.2014.01.024</pub-id>
          <pub-id pub-id-type="medline">24513169</pub-id>
          <pub-id pub-id-type="pii">S0091-7435(14)00055-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC4031268</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>AlKulaib</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Benton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Weaponized Health Communication: Twitter Bots and Russian Trolls Amplify the Vaccine Debate</article-title>
          <source>Am J Public Health</source>
          <year>2018</year>
          <month>10</month>
          <volume>108</volume>
          <issue>10</issue>
          <fpage>1378</fpage>
          <lpage>1384</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2018.304567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>WMP</given-names>
            </name>
          </person-group>
          <article-title>Addressing Health-Related Misinformation on Social Media</article-title>
          <source>JAMA</source>
          <year>2018</year>
          <month>12</month>
          <day>18</day>
          <volume>320</volume>
          <issue>23</issue>
          <fpage>2417</fpage>
          <lpage>2418</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2018.16865</pub-id>
          <pub-id pub-id-type="medline">30428002</pub-id>
          <pub-id pub-id-type="pii">2715795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charles-Smith</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Reynolds</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>EHY</given-names>
            </name>
            <name name-style="western">
              <surname>Olsen</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlin</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Shigematsu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Streichert</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Suda</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Corley</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Using Social Media for Actionable Disease Surveillance and Outbreak Management: A Systematic Literature Review</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>10</issue>
          <fpage>e0139701</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0139701"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0139701</pub-id>
          <pub-id pub-id-type="medline">26437454</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-21599</pub-id>
          <pub-id pub-id-type="pmcid">PMC4593536</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinnenberg</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Buttenheim</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Padrez</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mancheno</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Twitter as a Tool for Health Research: A Systematic Review</article-title>
          <source>Am J Public Health</source>
          <year>2017</year>
          <month>01</month>
          <volume>107</volume>
          <issue>1</issue>
          <fpage>e1</fpage>
          <lpage>e8</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2016.303512</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kalyanam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Katsuki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>R G Lanckriet</surname>
              <given-names>Gert</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
          </person-group>
          <article-title>Exploring trends of nonmedical use of prescription drugs and polydrug abuse in the Twittersphere using unsupervised machine learning</article-title>
          <source>Addict Behav</source>
          <year>2017</year>
          <month>02</month>
          <volume>65</volume>
          <fpage>289</fpage>
          <lpage>295</lpage>
          <pub-id pub-id-type="doi">10.1016/j.addbeh.2016.08.019</pub-id>
          <pub-id pub-id-type="medline">27568339</pub-id>
          <pub-id pub-id-type="pii">S0306-4603(16)30299-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ireland</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Albarracin</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Action Tweets Linked to Reduced County-Level HIV Prevalence in the United States: Online Messages and Structural Determinants</article-title>
          <source>AIDS Behav</source>
          <year>2016</year>
          <month>06</month>
          <volume>20</volume>
          <issue>6</issue>
          <fpage>1256</fpage>
          <lpage>64</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26650382"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10461-015-1252-2</pub-id>
          <pub-id pub-id-type="medline">26650382</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10461-015-1252-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4867271</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ireland</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Albarracín</surname>
              <given-names>Dolores</given-names>
            </name>
          </person-group>
          <article-title>Future-oriented tweets predict lower county-level HIV prevalence in the United States</article-title>
          <source>Health Psychol</source>
          <year>2015</year>
          <month>12</month>
          <volume>34S</volume>
          <fpage>1252</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26651466"/>
          </comment>
          <pub-id pub-id-type="doi">10.1037/hea0000279</pub-id>
          <pub-id pub-id-type="medline">26651466</pub-id>
          <pub-id pub-id-type="pii">2015-56045-006</pub-id>
          <pub-id pub-id-type="pmcid">PMC5621637</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Big data. The parable of Google Flu: traps in big data analysis</article-title>
          <source>Science</source>
          <year>2014</year>
          <month>03</month>
          <day>14</day>
          <volume>343</volume>
          <issue>6176</issue>
          <fpage>1203</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1126/science.1248506</pub-id>
          <pub-id pub-id-type="medline">24626916</pub-id>
          <pub-id pub-id-type="pii">343/6176/1203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Eichstaedt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dziurzynski</surname>
              <given-names>Lukasz</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>Richard E.</given-names>
            </name>
            <name name-style="western">
              <surname>Agrawal</surname>
              <given-names>Megha</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Gregory J</given-names>
            </name>
            <name name-style="western">
              <surname>Lakshmikanth</surname>
              <given-names>Shrinidhi K.</given-names>
            </name>
            <name name-style="western">
              <surname>Jha</surname>
              <given-names>Sneha</given-names>
            </name>
            <name name-style="western">
              <surname>Seligman</surname>
              <given-names>Martin E. P.</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>Lyle</given-names>
            </name>
          </person-group>
          <article-title>Characterizing Geographic Variation in Well-Being Using Tweets</article-title>
          <source>Proceedings of the 7th International AAAI Conference on Web and Social Media</source>
          <year>2013</year>
          <conf-name>7th International AAAI Conference on Web and Social Media</conf-name>
          <conf-date>7/8/2013</conf-date>
          <conf-loc>Cambridge, MA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aaai.org/ocs/index.php/ICWSM/ICWSM13/paper/view/6138"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Giorgi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sap</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Crutchley</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Eichstaedt</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>DLATK: Differential language analysis ToolKit</article-title>
          <source>Proceedings of the 2017 EMNLP System Demonstrations</source>
          <year>2017</year>
          <month>9</month>
          <day>7</day>
          <conf-name>EMNLP Systems Demonstrations</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <fpage>55</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://wwbp.org/papers/DLATK_Differential_Language_Analysis_ToolKit.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d17-2010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lui</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Baldwin</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>langid.py: an off-the-shelf language identification tool</article-title>
          <source>Proceedings of the ACL 2012 System Demonstrations</source>
          <year>2012</year>
          <month>7</month>
          <day>1</day>
          <conf-name>Association for Computer Linguistics '12</conf-name>
          <conf-date>2012-7-1</conf-date>
          <conf-loc>Stroudsburg, PA</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>25</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/2390470.2390475"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sap</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Eichstaedt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stillwell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kosinski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
          </person-group>
          <article-title>Developing Age and Gender Predictive Lexica over Social Media</article-title>
          <source>http://emnlp2014.org/papers/pdf/EMNLP2014121.pdf</source>
          <year>2014</year>
          <conf-name>Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>1146</fpage>
          <lpage>1151</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://emnlp2014.org/papers/pdf/EMNLP2014121.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Preoiuc-Pietro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>User-Level Race and Ethnicity Predictors from Twitter Text</article-title>
          <year>2018</year>
          <conf-name>Proceedings of the 27th International Conference on Computational Linguistics</conf-name>
          <conf-date>2018</conf-date>
          <conf-loc>Santa Fe, NM</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/C18-1130/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Rivers</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Methods of using real-time social media technologies for detection and remote monitoring of HIV outcomes</article-title>
          <source>Prev Med</source>
          <year>2014</year>
          <month>06</month>
          <volume>63</volume>
          <fpage>112</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24513169"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ypmed.2014.01.024</pub-id>
          <pub-id pub-id-type="medline">24513169</pub-id>
          <pub-id pub-id-type="pii">S0091-7435(14)00055-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC4031268</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Centers</surname>
              <given-names>for Disease ControlPrevention</given-names>
            </name>
          </person-group>
          <source>https://www.cdc.gov/nchhstp/atlas/index.htm</source>
          <year>2017</year>
          <access-date>2018-02-02</access-date>
          <publisher-loc>Atlanta, GA</publisher-loc>
          <publisher-name>Center for Disease Control and Prevention</publisher-name>
          <comment>NCHHSTP AtlasPlus<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/nchhstp/atlas/index.htm">https://www.cdc.gov/nchhstp/atlas/index.htm</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hallett</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Zaba</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Todd</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lopman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mwita</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Biraro</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gregson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Boerma</surname>
              <given-names>JT</given-names>
            </name>
            <collab>ALPHA Network</collab>
          </person-group>
          <article-title>Estimating incidence from prevalence in generalised HIV epidemics: methods and validation</article-title>
          <source>PLoS Med</source>
          <year>2008</year>
          <month>04</month>
          <day>08</day>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>e80</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pmed.0050080"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.0050080</pub-id>
          <pub-id pub-id-type="medline">18590346</pub-id>
          <pub-id pub-id-type="pii">07-PLME-RA-0491</pub-id>
          <pub-id pub-id-type="pmcid">PMC2288620</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buot</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Docena</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Ratemo</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Bittner</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Burlew</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Nuritdinov</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Beyond race and place: distal sociological determinants of HIV disparities</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>4</issue>
          <fpage>e91711</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0091711"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0091711</pub-id>
          <pub-id pub-id-type="medline">24743728</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-37521</pub-id>
          <pub-id pub-id-type="pmcid">PMC3990614</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bauermeister</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Connochie</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Eaton</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Demers</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stephenson</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Geospatial Indicators of Space and Place: A Review of Multilevel Studies of HIV Prevention and Care Outcomes Among Young Men Who Have Sex With Men in the United States</article-title>
          <source>J Sex Res</source>
          <year>2017</year>
          <volume>54</volume>
          <issue>4-5</issue>
          <fpage>446</fpage>
          <lpage>464</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28135857"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/00224499.2016.1271862</pub-id>
          <pub-id pub-id-type="medline">28135857</pub-id>
          <pub-id pub-id-type="pmcid">PMC5623108</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <source>US Census Bureau, American Community Survey, 2016 American Community Survey 1-year estimates</source>
          <access-date>2019-02-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://data.census.gov/cedsci">http://data.census.gov/cedsci</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agresti</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>An introduction to categorical data analysis</source>
          <year>2018</year>
          <publisher-loc>Hoboken, New Jersey</publisher-loc>
          <publisher-name>WIley</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>R Foundation</collab>
          </person-group>
          <source>R: A language environment for statistical computing computer program</source>
          <year>2013</year>
          <access-date>2020-05-20</access-date>
          <publisher-loc>Vienna, Austria</publisher-loc>
          <publisher-name>R Foundation for Statistical Computing</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.r-project.org/">https://www.r-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Venables</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ripley</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>Modern Applied Statistics with S</source>
          <year>2002</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noguchi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Handley</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Albarracín</surname>
              <given-names>Dolores</given-names>
            </name>
          </person-group>
          <article-title>Participating in politics resembles physical activity: general action patterns in international archives, United States archives, and experiments</article-title>
          <source>Psychol Sci</source>
          <year>2011</year>
          <month>02</month>
          <volume>22</volume>
          <issue>2</issue>
          <fpage>235</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21177515"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0956797610393746</pub-id>
          <pub-id pub-id-type="medline">21177515</pub-id>
          <pub-id pub-id-type="pii">0956797610393746</pub-id>
          <pub-id pub-id-type="pmcid">PMC4793907</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wallerstein</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Duran</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Community-Based Participatory Research Contributions to Intervention Research: The Intersection of Science and Practice to Improve Health Equity</article-title>
          <source>Am J Public Health</source>
          <year>2010</year>
          <month>04</month>
          <volume>100</volume>
          <issue>S1</issue>
          <fpage>S40</fpage>
          <lpage>S46</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2009.184036</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
