<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i4e22042</article-id>
      <article-id pub-id-type="pmid">33900200</article-id>
      <article-id pub-id-type="doi">10.2196/22042</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Leveraging Social Media Activity and Machine Learning for HIV and Substance Abuse Risk Assessment: Development and Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chang</surname>
            <given-names>Angela</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pradhan</surname>
            <given-names>Meeta</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Ovalle</surname>
            <given-names>Anaelia</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>University of California Los Angeles</institution>
            <addr-line>Engineering VI</addr-line>
            <addr-line>404 Westwood Plaza</addr-line>
            <addr-line>Los Angeles, CA, 90095</addr-line>
            <country>United States</country>
            <phone>1 3108253886</phone>
            <email>anaelia@cs.ucla.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0531-7520</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Goldstein</surname>
            <given-names>Orpaz</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9764-1618</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Kachuee</surname>
            <given-names>Mohammad</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0099-3466</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Elizabeth S C</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3015-8795</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Hong</surname>
            <given-names>Chenglin</given-names>
          </name>
          <degrees>BSW, MSW, MPH</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9652-388X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Holloway</surname>
            <given-names>Ian W</given-names>
          </name>
          <degrees>LCSW, MPH, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7454-8632</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Sarrafzadeh</surname>
            <given-names>Majid</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8407-8689</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>University of California Los Angeles</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Social Welfare</institution>
        <institution>University of California Los Angeles</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Anaelia Ovalle <email>anaelia@cs.ucla.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>4</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>26</day>
        <month>4</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>4</issue>
      <elocation-id>e22042</elocation-id>
      <history>
        <date date-type="received">
          <day>4</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>9</day>
          <month>10</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>25</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>31</day>
          <month>1</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Anaelia Ovalle, Orpaz Goldstein, Mohammad Kachuee, Elizabeth S C Wu, Chenglin Hong, Ian W Holloway, Majid Sarrafzadeh. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 26.04.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/4/e22042" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Social media networks provide an abundance of diverse information that can be leveraged for data-driven applications across various social and physical sciences. One opportunity to utilize such data exists in the public health domain, where data collection is often constrained by organizational funding and limited user adoption. Furthermore, the efficacy of health interventions is often based on self-reported data, which are not always reliable. Health-promotion strategies for communities facing multiple vulnerabilities, such as men who have sex with men, can benefit from an automated system that not only determines health behavior risk but also suggests appropriate intervention targets.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to determine the value of leveraging social media messages to identify health risk behavior for men who have sex with men.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The Gay Social Networking Analysis Program was created as a preliminary framework for intelligent web-based health-promotion intervention. The program consisted of a data collection system that automatically gathered social media data, health questionnaires, and clinical results for sexually transmitted diseases and drug tests across 51 participants over 3 months. Machine learning techniques were utilized to assess the relationship between social media messages and participants' offline sexual health and substance use biological outcomes. The F1 score, a weighted average of precision and recall, was used to evaluate each algorithm. Natural language processing techniques were employed to create health behavior risk scores from participant messages.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Offline HIV, amphetamine, and methamphetamine use were correctly identified using only social media data, with machine learning models obtaining F1 scores of 82.6%, 85.9%, and 85.3%, respectively. Additionally, constructed risk scores were found to be reasonably comparable to risk scores adapted from the Center for Disease Control.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>To our knowledge, our study is the first empirical evaluation of a social media–based public health intervention framework for men who have sex with men. We found that social media data were correlated with offline sexual health and substance use, verified through biological testing. The proof of concept and initial results validate that public health interventions can indeed use social media–based systems to successfully determine offline health risk behaviors. The findings demonstrate the promise of deploying a social media–based just-in-time adaptive intervention to target substance use and HIV risk behavior.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>online social networks</kwd>
        <kwd>machine learning</kwd>
        <kwd>behavioral intervention</kwd>
        <kwd>data mining</kwd>
        <kwd>msm</kwd>
        <kwd>public health</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Men who have sex with men are disproportionately affected by HIV and other sexually transmitted infections. In the United States, men who have sex with men accounted for two-thirds of incident HIV infections and more than half of new syphilis diagnoses in 2018 [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. Substance use has been a persistent driver of the ongoing HIV epidemic in men who have sex with men. Research suggests substance use is highly associated with high-risk sexual behaviors such as condomless anal sex, multiple sex partners, and sex trading for drugs [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>Web-based communication tools such as social networking sites (eg, “hookup apps,” dating websites) have been used among men who have sex with men to seek sexual partners and share information and resources about substance use [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. In the early 2010s, 85% of men who have sex with men used the internet to find sexual partners [<xref ref-type="bibr" rid="ref9">9</xref>], and this figure grew to 96% in 2019 [<xref ref-type="bibr" rid="ref11">11</xref>]. However, the rising popularity of these technologies has also raised concerns about their role in facilitating sexual risk behaviors. Studies [<xref ref-type="bibr" rid="ref12">12</xref>] have shown that men who have sex with men are more likely to engage in condomless anal sex with sex partners met online compared to partners met offline and have demonstrated that men who have sex with men who seek partners online have greater numbers of sexual partners compared to those who do not seek partners online. Furthermore, men who have sex with men who identify sexual partners online have a greater likelihood of substance use [<xref ref-type="bibr" rid="ref13">13</xref>], although the evidence is equivocal [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Further studies are needed to provide empirical evidence of the association between online social networking technologies and offline sexual and substance use behaviors.</p>
      <p>With more than 40% of health care consumers utilizing social media for their health-related decision making, social networks have indeed caught the attention of the public health domain [<xref ref-type="bibr" rid="ref16">16</xref>]. Population-based analyses and in-person interventions are costly, both in time and resources [<xref ref-type="bibr" rid="ref17">17</xref>]. Health spending is projected to grow at an average rate of 5.5% per year, totaling $6.0 trillion by 2027, nearly one-fifth of the United States gross domestic product [<xref ref-type="bibr" rid="ref18">18</xref>]. Given these costs, the opportunity for high user engagement, and accessibility of social media data, social networks provide a new opportunity to public health. In recent years, social media have been employed in behavioral and public health research and has demonstrated its effectiveness in prevention, education, and treatment [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. For instance, analyzing user activity on social media platforms has been an effective way to estimate the risk and time of HIV infection [<xref ref-type="bibr" rid="ref21">21</xref>]. A separate study [<xref ref-type="bibr" rid="ref22">22</xref>] found that the strength of associations in a social network, its network shape, and size are predictors of HIV and sexually transmitted infection risk.</p>
      <p>Public health studies have also begun using social media platforms to understand and intervene in sexual health and substance use risk behaviors among men who have sex with men; however, these methodologies remain nascent in that they still rely on self-report and costly data collection as a means of developing and testing interventions [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Research demonstrates the need to utilize big data in social media and machine learning to understand communication and patterns about substance use and observe and predict real-time risk behaviors [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>]. These strategies can inform just-in-time adaptive interventions [<xref ref-type="bibr" rid="ref29">29</xref>] that are responsive to the individual technology use patterns of research participants; however, before deploying any social media–based intervention, the feasibility and efficacy behind employing such a modality for public health initiatives in risk reduction must be determined.</p>
      <p>In practice, adaptive intervention systems are driven by an ability to determine health risks. Placed in the context of social media data, it is encouraging to learn that assessing health risk using textual sources has shown promising results in disease-specific risk evaluation and in identifying individuals at higher risk of depression and self-harm [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>]. While text collection from electronic health records is extremely effective in determining a diagnosis, it is not a readily available resource for continuous risk assessment. Meanwhile, social media text data have the advantage of being abundantly available and cost-effective. While these data are not as domain constrained as clinical notes, they remain promising channels to explore for risk assessment.</p>
      <p>Additionally, system interventions should be able to accurately evaluate when an individual is about to engage in a targeted health risk behavior with high probability followed by successfully reducing such behaviors. Maher et al [<xref ref-type="bibr" rid="ref33">33</xref>] reviewed the effectiveness of past social network interventions, concluding with a call for stronger evidence in interventions that incorporate online social networks. Our paper responds to this call by evaluating the efficacy of social media data in determining health risk behavior. We are guided by the following questions: (1) Can we further substantiate the association between online social networking technologies and offline sexual and substance use behaviors? (2) Can we extract health risk scores from social media data that align with public health expert evaluation?</p>
      <p>In this paper, the practicality of social media as an intervention modality is evaluated through social media data to identify health risk behavior in a sample of men who have sex with men from Los Angeles, California. The contributions of this paper are the following: (1) an end-to-end platform that continuously collects data from common social media platforms and specialized social networks tailored to the men who have sex with men community, and in tandem, biological data and personal health questionnaires were collected at baseline, 1-month, and 3-months from intake; (2) health behavior risk scores that are comparable to adapted risk scores created by the Centers for Disease Control and Prevention (CDC) using natural language processing techniques; and (3) the application of machine learning techniques to determine the extent to which social media messages can be used to directly predict verified biological outcomes of substance use and sexual risk, reflected as sexually transmitted disease diagnoses.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Protocol</title>
        <p>The study protocol consisted of 4 milestones: (1) screener, (2) baseline visit, (3) 1-month follow-up, and (4) 3-month follow-up (<xref rid="figure1" ref-type="fig">Figure 1</xref>). In the screener, flyers, online advertisements, and referrals were used to recruit potential participants. Further screening took place over the phone or through an web-based survey. Criteria such as age, sexual orientation, substance use, online dating, and social media activity were used to determine each participant's eligibility for the study.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Different phases of the study protocol and data collection.</p>
          </caption>
          <graphic xlink:href="jmir_v23i4e22042_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Qualified participants were invited to an initial clinical visit to review the study in detail, ask for informed consent, and answer any of their remaining questions. Afterward, a series of lab tests were conducted to determine their substance use and the presence of sexually transmitted diseases. Site testing was conducted for <italic>Chlamydia trachomatis</italic> and <italic>Neisseria gonorrhoeae</italic> with pharyngeal, urethral, and anal swabs. Further tests included a rapid plasma reagin blood test for syphilis, a rapid oral test for HIV, and a urine drug screen. Additionally, a survey was completed by the participants which asked a series of questions regarding demographic characteristics, sexual risk behavior, illicit substance use, and online behavior. Finally, participants provided their log-in credentials for a set of social media sites on which they had been. The user credentials were registered in a custom data collection platform for each website and the participants authorized the data collection system to pull their daily online activity. We collected participant social media data for up to 3 months after onboarding. We found this to be a reasonable duration considering the need to follow participants long enough to observe any changes in social media use and behaviors over time that can be measured by follow-up surveys and their biomarkers.</p>
        <p>The system began collecting participants' daily social media activity immediately after the baseline visit. One month into the study, they were scheduled to revisit the clinic and redo lab tests and surveys. A final follow-up was set for 3 months after the baseline visit to recollect lab and survey data in addition to conducting required off-boarding procedures, including the discontinuation of participant data collection.</p>
        <p>This study protocol was approved by the University of California, Los Angeles institutional review board (IRB 17-000408). Informed consent was obtained from all individual participants included in the study. Each participant was provided with up to US $150 in cash incentives based on their participation. Certain medical conditions identified within this study were also reported to appropriate agencies as required by federal and state laws. Proper consultation and referrals were provided to each participant before and after reporting each screening test.</p>
      </sec>
      <sec>
        <title>System Architecture</title>
        <p>Data from Facebook and Twitter was automatically collected through official application programming interfaces (API). The APIs were used to query the content (eg, messages, posts) generated by each participant; however, it is important to note that Facebook changed its API policy during the study. Standard permission requests could no longer be used to access user data. To mitigate this issue and re-enable the permission to read user content, the participants enrolled as testers of a custom Facebook app developed during the study.</p>
        <p>In addition to these major social media platforms, we collected similar data fields for men who have sex with men–specific social networks. The software implementation slightly differed from that of Facebook and Twitter in that a unique web scraper was built to collect data. Due to the study's privacy protocol, the name of each site was omitted. The websites, labeled Website A and Website B, are one of the most established websites in the men who have sex with men community, with over 10 million users overall. Qualitative interviews and input from a community advisory board further informed the decision to include these websites. Each scraper used a combination of browsing automation tools and web parsers to mimic a user going online and accessing their profile. URL requests were made using Selenium, a browsing tool that can conduct automatic authentication, search, and navigate each site of interest [<xref ref-type="bibr" rid="ref34">34</xref>]. Participants explicitly provided consent and their usernames and passwords per institutional review board protocol approvals. These credentials were used by Selenium to automate logging into a website and navigating to pages where the system was able to collect profiles and messages between users. The data were extracted from each website of interest using the BeautifulSoup Python Package and later saved to the database [<xref ref-type="bibr" rid="ref35">35</xref>]. <xref rid="figure2" ref-type="fig">Figure 2</xref> provides a visualization of the study's overall database schema. MySQL, a free and open-source database server, was used to store and query user messages and metadata including usernames and respective access tokens from each website. To maintain high data-quality, a hashing of the data and timestamp was created within the database to prevent duplication.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>A visualization of the database schema used in this study. MSM: men who have sex with men.</p>
          </caption>
          <graphic xlink:href="jmir_v23i4e22042_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>We validated that the system collected participant messages consistently and reliably by maintaining a group of test users. Before and throughout the study, various mixed-media data were sent across these platforms to make sure all types of generated data were correctly collected and accurately represented. The data consisted of text, videos, and emojis.</p>
        <p>Due to the sensitive nature of collecting online social media activity, providing security and privacy was of paramount importance. Therefore, the data collection system was designed to offer protection on multiple levels: at the system-level and the data-level. At the system-level, the user data were stored on a dedicated private server behind a firewall, protecting against outside cyber-attacks and malicious software. At the data-level, the user data went through a data sanitation process. Participants' identifiers in the data collection system were anonymized and could only be matched with a reference datasheet stored externally on a cloud storage service compliant with the Health Insurance Portability and Accountability Act (HIPAA). The cloud storage service was also used to store all other data, such as participant questionnaires and drug screening results.</p>
      </sec>
      <sec>
        <title>Defining Health Behavior Risk</title>
        <p>Given the ambiguous nature of identifying health risk behavior in social media messages, a map between commonly used colloquial terminology and risk behavior topics was curated. To gain a more nuanced understanding of online strategies and behaviors around seeking drugs and sexual partners, 24 men who have sex with men community members in Los Angeles who self-disclosed risky sex, illicit drug use, and usage of dating apps were qualitatively interviewed. The response to the following question became the foundation for the risk behavior dictionary: “What are some of the terms that you use on websites, chatrooms, message boards, and apps to find drugs, drug use partners, and/or sex partners?” The words were organized into topics (<xref ref-type="table" rid="table1">Table 1</xref>). All terms were vetted by a community advisory board throughout the study. The resulting risk behavior dictionary was leveraged to facilitate the modeling of the relationship between social media messages and health behavior risk.</p>
        <p>Sexual health and illicit drug use risk was also evaluated for each participant using the CDC Risk Assessment Tool [<xref ref-type="bibr" rid="ref36">36</xref>]. Each assessment was adjusted based on participants' weekly text message diary responses on illicit substance use. Scores for each participant were available at baseline intake, 1-month, and 3-month checkpoints.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Sexual and substance use topics associated with colloquial terms found in text conversations based on qualitative interviews with men who have sex with men community members.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="600"/>
            <thead>
              <tr valign="top">
                <td>Topic</td>
                <td>Words</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Alcohol</td>
                <td>drunk, drinking, booze, party, liquor</td>
              </tr>
              <tr valign="top">
                <td>Marijuana</td>
                <td>toke, pipe, weed, pot, mary jane</td>
              </tr>
              <tr valign="top">
                <td>Cocaine</td>
                <td>crack, blow, snow, yayo, powder</td>
              </tr>
              <tr valign="top">
                <td>Methamphetamine</td>
                <td>meth, speed, ice, crank, crystal</td>
              </tr>
              <tr valign="top">
                <td>Amyl nitrate</td>
                <td>poppers, rush, pops, amyl</td>
              </tr>
              <tr valign="top">
                <td>Heroin</td>
                <td>dope, smack, junk, tar</td>
              </tr>
              <tr valign="top">
                <td>Ecstasy</td>
                <td>X, Ex, molly, rolling, mdma</td>
              </tr>
              <tr valign="top">
                <td>GHB</td>
                <td>G, roll, water</td>
              </tr>
              <tr valign="top">
                <td>Ketamine</td>
                <td>K</td>
              </tr>
              <tr valign="top">
                <td>Type of substance other/general</td>
                <td>pills, favors, DMT, party, party favors</td>
              </tr>
              <tr valign="top">
                <td>Snorting</td>
                <td>snort, sniff, rack, rail, lines</td>
              </tr>
              <tr valign="top">
                <td>Inhaling</td>
                <td>smoke, blow clouds, hit, puff</td>
              </tr>
              <tr valign="top">
                <td>Injection</td>
                <td>straight to the point, straight to the poinT, slam, slamming, shoot, shooting</td>
              </tr>
              <tr valign="top">
                <td>Anal insertion</td>
                <td>booty bumping, butt rocket, plug, plugging</td>
              </tr>
              <tr valign="top">
                <td>Substance use behavior other/general</td>
                <td>gen, generous, friends with benefits</td>
              </tr>
              <tr valign="top">
                <td>Buying drugs</td>
                <td>Do you have a connect, I can contribute, can you do me a big/little/huge favor</td>
              </tr>
              <tr valign="top">
                <td>Masturbation</td>
                <td>JO, jack, stroke, HJ, jack off</td>
              </tr>
              <tr valign="top">
                <td>Oral sex</td>
                <td>blow, head, gloryhole, suck, BJ</td>
              </tr>
              <tr valign="top">
                <td>Anal intercourse</td>
                <td>top, bottom, fuck, power top, power bottom</td>
              </tr>
              <tr valign="top">
                <td>Group sex</td>
                <td>3some, 3way, gang bang, orgy, bukkake</td>
              </tr>
              <tr valign="top">
                <td>Sex work</td>
                <td>$, roses, generous, pro, GEN</td>
              </tr>
              <tr valign="top">
                <td>Anonymous</td>
                <td>anon, discreet, discrete, anonymous, random play</td>
              </tr>
              <tr valign="top">
                <td>Sex with condoms</td>
                <td>condoms, rubber, safe sex, play safe, safe</td>
              </tr>
              <tr valign="top">
                <td>Condomless sex</td>
                <td>Bareback, bare, raw, seed, seeding</td>
              </tr>
              <tr valign="top">
                <td>Substance use and sex</td>
                <td>Party and play, smoke and stroke, pnp, party, partying</td>
              </tr>
              <tr valign="top">
                <td>Sexual behavior general/other</td>
                <td>69</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Set Processing and Auditing</title>
        <p>Data were cleaned to maximize consistency and accuracy. The messages were first screened using Python regular expressions to pattern match texts identified as spam or in-app advertisements. Pattern personal identifiable information such as addresses and phone numbers were tokenized. For instance, if a phone number was provided in a message, it was replaced with “phonenumbertoken.” Data were deduplicated and the privacy of those that communicated with our participants was protected. This was done by only considering the participants' sent messages, resulting in data for 48 individuals. Some messages also consisted of notifications such as when a participant's profile was seen, clicked on, or had a request to unlock their photos—all of which were tokenized with the suffix “token.” One data source in the study was excluded because it failed to produce data due to changes in the site's data collection policy. Furthermore, we focus our analysis on biomarkers that were present in at least 10% of participants.</p>
        <p>After data cleaning, the messages went through an automated pattern matching pipeline using regular expressions to identify terms in the risk behavior dictionary defined in <xref ref-type="table" rid="table1">Table 1</xref>. The mapping was utilized to flag words and colloquial terms associated with health risk behaviors in each message. As an example, if the word in a given message was “rail,“ it was matched to the topic of snorting.</p>
      </sec>
      <sec>
        <title>Biomarker Prediction With Social Media Messages</title>
        <p>The relationship between participants' social media behavior and illicit substance use and sexual risk behaviors was examined; data collected across social media accounts were leveraged to predict participant’s respective offline substance use and sexual health biomarkers.</p>
        <p>Standardized counts of each tokenized word and text-summary features, such as message length, were used as simple features to predict drug use and sexually transmitted diseases. Each outcome was treated as its own binary classification task. Logistic regression, linear support vector machine, naïve Bayes, and random forest models were employed to predict the outcomes at 1-month and 3-month follow-up. Given the relatively small data set and the challenge of class imbalance, stratified 5-fold cross-validation was used to assess the generalizability of the predictive models. The performance of each model was assessed using the precision, defined as <italic>true positive</italic> / (<italic>true positive + false positive</italic>); recall, defined as <italic>true positive</italic> / (<italic>true positive + false negative</italic>); and F1 score, defined as 2 × <italic>precision</italic> × <italic>recall</italic> / (<italic>precision + recall</italic>).</p>
      </sec>
      <sec>
        <title>Message-Based Risk Scores</title>
        <p>Health risk behavior was assessed on a per-message level using available social media text correspondence. A risk score was given for each message based on how likely its words were associated with those in the risk behavior dictionary.</p>
      </sec>
      <sec>
        <title>Identifying Health Risk Behaviors Using Social Media Messages</title>
        <p>Natural language processing techniques were employed to create a risk score using social media data. We employed a Skip-Gram Word2Vec model which allowed us to extract word representations to determine the association between words in social media messages and words in the risk behavior dictionary [<xref ref-type="bibr" rid="ref37">37</xref>]. The Skip-Gram Word2vec model constructs a word representation, or word embedding, based on how well it predicts the words that surround it within a given radius. Given all the words in the message corpus, <italic>w</italic><sub>1</sub>, <italic>w</italic><sub>2</sub>, <italic>w</italic><sub>3</sub>,..., <italic>w</italic><sub>T</sub>, the model tries to predict the probability of observing the context word <italic>w</italic><sub>t+j</sub> given a target word <italic>w</italic><sub>t</sub>.</p>
        <p>
          <disp-formula>
            <graphic xlink:href="jmir_v23i4e22042_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>The resulting model constructs a vector space that identifies context-dependent similarity between words used by the participants in the selected social networks monitored.</p>
      </sec>
      <sec>
        <title>Health Behavior Risk Scoring</title>
        <p>Once contextually similar instances of risky text messages are found, a risk score is constructed with cosine similarity, similar to the approach used by Kiros et al [<xref ref-type="bibr" rid="ref38">38</xref>]. Given user <italic>i</italic>’s <italic>t</italic>th message, the risk score is defined as the average cosine similarity between the word in the message, <inline-graphic xlink:href="jmir_v23i4e22042_fig8.png" xlink:type="simple" mimetype="image"/>, and the risky word <italic>w</italic> from each risk topic <italic>d</italic> set of risky words.</p>
        <p>
          <disp-formula>
            <graphic xlink:href="jmir_v23i4e22042_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>The distance between the words found in messages and those from the risk behavior dictionary was used to decide when a user was displaying a risky textual correspondence. Afterward, similarity between the risk scores generated by the model and those provided by the CDC risk assessment tool was assessed for each participant.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data</title>
        <p>A total of 15,695 sent messages were collected in the 3-month timeline for 48 participants across 4 different platforms: Twitter, Facebook, Website A, and Website B after preprocessing. 6.5% (1026) of the messages were advertisements filtered out by our data processor. As shown in <xref ref-type="table" rid="table2">Table 2</xref>, the majority of messages, 75.7% (11,877), came from Website A, followed by Website B with 21.2% (3327), Facebook at 2.2% (352), and Twitter with 0.89% (139) of messages. Participant activity across all platforms is displayed as a heatmap in <xref rid="figure3" ref-type="fig">Figure 3</xref>, with participants 28, 40, and 42 showing the highest activity after initial onboarding.</p>
        <p>The distribution of each topic is visualized in <xref rid="figure4" ref-type="fig">Figure 4</xref>. The topics with the most discussion across the social media sites were anal intercourse and methamphetamines use. In the messages themselves, tokens such as “clickedprofiletoken,” “unlockedphotostoken,” and “phonenumbertoken” were the highest occurring tokens across all messages. This makes sense, as activity in dating sites is heavily based on interacting with photos and other information exchange, potentially leading up to an offline connection.</p>
        <p>In terms of clinical data, the distribution of each outcome can be seen in <xref ref-type="table" rid="table3">Table 3</xref>, where the majority of selected outcomes suffer from a 10% to 15% imbalance.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Messages sent across social media platforms by participants.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="340"/>
            <col width="330"/>
            <thead>
              <tr valign="top">
                <td>Source</td>
                <td>Messages sent, n (%<sup>a</sup>)</td>
                <td>Cumulative messages sent, n (%<sup>a</sup>)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Twitter</td>
                <td>139 (0.9)</td>
                <td>139 (0.9)</td>
              </tr>
              <tr valign="top">
                <td>Facebook</td>
                <td>352 (2.2)</td>
                <td>491 (3.1)</td>
              </tr>
              <tr valign="top">
                <td>Website<sup>b</sup> A</td>
                <td>3327 (21.2)</td>
                <td>3818 (24.3)</td>
              </tr>
              <tr valign="top">
                <td>Website<sup>b</sup> B</td>
                <td>11,877 (75.7)</td>
                <td>15,695 (100)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Percentage of total messages.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Specific to men who have sex with men.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>A heatmap of user activity across all sources of data during the study.</p>
          </caption>
          <graphic xlink:href="jmir_v23i4e22042_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Topics found in messages using the risk behavior word dictionary provided by public health experts.</p>
          </caption>
          <graphic xlink:href="jmir_v23i4e22042_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Distribution of biomarker outcomes in a 1-month and 3-month follow-up after participants’ initial onboarding.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="280"/>
            <col width="0"/>
            <col width="340"/>
            <col width="0"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td colspan="4">Test and outcome</td>
                <td colspan="2">1-month follow-up (n=48), n (%)</td>
                <td>3-month follow-up (n=48), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Sexually transmitted disease</bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">
                  <bold>HIV<sup>a</sup></bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Positive</td>
                <td colspan="2">20 (41.7)</td>
                <td colspan="2">23 (47.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Negative</td>
                <td colspan="2">28 (58.3)</td>
                <td colspan="2">25 (52.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Substance use</bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">
                  <bold>Amphetamine</bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Positive</td>
                <td colspan="2">24 (50.0)</td>
                <td colspan="2">21 (43.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Negative</td>
                <td colspan="2">24 (50.0)</td>
                <td colspan="2">27 (56.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">
                  <bold>Methamphetamine</bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Positive</td>
                <td colspan="2">25 (52.1)</td>
                <td colspan="2">22 (45.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Negative</td>
                <td colspan="2">23 (47.9)</td>
                <td colspan="2">26 (54.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">
                  <bold>THC<sup>b</sup></bold>
                </td>
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Positive</td>
                <td colspan="2">20 (41.7)</td>
                <td colspan="2">18 (37.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Negative</td>
                <td colspan="2">28 (58.3)</td>
                <td colspan="2">30 (62.5)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>HIV: human immunodeficiency virus.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>THC: tetrahydrocannabinol.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Biomarker Prediction</title>
        <p>Of the 4 biomarkers, only 3 reflect an F1 score greater than 80%—HIV, amphetamine, and methamphetamine (<xref ref-type="table" rid="table4">Table 4</xref>). Although the tetrahydrocannabinol outcome did not suffer from severe class imbalance, one reason that may explain the significantly poorer performance is that the topic was heavily impacted by polysemy. As an example, participants alluded to marijuana usage with words and phrases that had multiple meanings. The phrase “blowing clouds” could refer to smoking marijuana or smoking methamphetamines.</p>
        <p>For these 3 offline biomarker outcomes, the random forest model resulted in the highest F1 scores. Amphetamine usage was the best predicted outcome for all 4 models.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>F1 scores for each respective model and outcome in a 1-month and 3-month follow-up period after participants’ initial onboarding.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="510"/>
            <col width="230"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Outcome and model</td>
                <td colspan="2">F1 Score, %</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>1-month follow-up</td>
                <td>3-month follow-up</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">
                  <bold>HIV<sup>a</sup></bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Support vector machine</td>
                <td>70.5</td>
                <td>81.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td>73.4</td>
                <td>82.6</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Naïve Bayes</td>
                <td>69.6</td>
                <td>82.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Logistic regression</td>
                <td>68.5</td>
                <td>81.4</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Amphetamines</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Support vector machine</td>
                <td>88.2</td>
                <td>85.9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td>88.3</td>
                <td>85.9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Naïve Bayes</td>
                <td>88.0</td>
                <td>85.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Logistic regression</td>
                <td>88.1</td>
                <td>85.8</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Methamphetamines</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Support vector machine</td>
                <td>88.3</td>
                <td>85.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td>88.3</td>
                <td>85.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Naïve Bayes</td>
                <td>88.1</td>
                <td>84.8</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Logistic regression</td>
                <td>88.2</td>
                <td>85.0</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>THC<sup>b</sup></bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Support vector machine</td>
                <td>11.1</td>
                <td>7.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td>5.0</td>
                <td>0.5</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Naïve Bayes</td>
                <td>24.0</td>
                <td>4.7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Logistic regression</td>
                <td>10.9</td>
                <td>9.1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>HIV: human immunodeficiency virus.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>THC: tetrahydrocannabinol.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Associating Words With Health Behavior Risk</title>
        <p>A dimensionality reduction technique, <italic>t</italic>-distributed stochastic neighbor embedding, was used to observe the relationship between word embeddings [<xref ref-type="bibr" rid="ref39">39</xref>]. <xref rid="figure5" ref-type="fig">Figure 5</xref> shows the resulting vector space as words are projected onto a 2D plane. Intuitively, the resulting proximity between words can be interpreted as their similarity. As a result, natural groupings between drug and sex-related words form. For instance, the word “Smurff,” a term used to describe oral sex, is grouped with the term “head,” a known colloquial term for oral sex.</p>
        <p>Health behavior risk scores were calculated using the Word2Vec word vector space. Risk scores were created each day for a given user in addition to their average risk score along with risk scores provided by public health experts. While the constructed daily scores vary across time, there is a visible similarity between the generated risk score and expert scores, on average (<xref rid="figure6" ref-type="fig">Figure 6</xref>).</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>A t-distributed stochastic neighbor embedding visualization of the resulting vector space after running Word2Vec: drug-related words, sex-related words, and randomly chosen words.</p>
          </caption>
          <graphic xlink:href="jmir_v23i4e22042_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Normalized expert risk evaluation for drug and sex at 1 month and 3 months.</p>
          </caption>
          <graphic xlink:href="jmir_v23i4e22042_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>To our knowledge, our study is the first empirical evaluation of a social media–based public health intervention framework for men who have sex with men. Our qualitative work highlighted the ways in which men who have sex with men use coded language online to refer to specific substance use behavior and HIV risk. We investigated the association between social media data and offline health risk behavior by operationalizing social media data across several networking sites. We built a system that automated social media data collection, which allowed us to predict offline substance use and sexual health biomarkers and construct daily health behavior risk scores.</p>
        <p>In conducting an exploratory data analysis to check for data quality, we validated that the system was indeed able to collect participant messages consistently and reliably using the proposed system architecture. It was reasonable to observe that most messages came from men who have sex with men–specific social networks, as individuals may be more reserved around topics related to health risk behaviors over platforms with a larger audience such as Facebook and Twitter. Furthermore, we observed that after the topic of anal intercourse, the topic of methamphetamines was the most frequent in risk behavior conversation (<xref rid="figure5" ref-type="fig">Figure 5</xref>).</p>
        <p>We tested the extent to which social media data could provide meaningful insight into health risk behaviors by predicting offline sexual health and substance use biomarkers. We found that across the models and clinical timelines, there was a consistently high F1 score when predicting HIV, amphetamine, and methamphetamine use. These results are validated by the fact that methamphetamines are one of the most commonly used drugs in among men who have sex with men [<xref ref-type="bibr" rid="ref5">5</xref>]. Moreover, these findings align with existing literature pointing to the association between methamphetamine use and increased risk for HIV [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Most importantly, we validate the significance of social media data in its association with substance use and sexual health biomarker outcomes.</p>
        <p>Daily health risk scores were created for participants using only social media data. The method was validated by the observation that the constructed risk scores, on average, were comparable to adapted risk scores created by the CDC risk assessment tool. The results were promising at a fundamental level, as we observed how words in social media messages can indeed cluster with known drug and sex-related words when mapped to a vector space (<xref rid="figure5" ref-type="fig">Figure 5</xref>). In constructing an average risk score for participants, we used their text messages and respective contexts to extract risk; we assumed that the public health risk dictionary was most accurate and encompassing of health risk behavior terminology. The health behavior risk scores were constructed on a daily level. Only social media data were used to construct the risk score, while public experts used biologically verified results and self-reported data to construct monthly risk scores by adapting a version of the CDC risk assessment tool. Yet, even with this significant difference in methodology, the average risk was still quite similar to the expertly assessed risk. This suggests that the community-based participatory approach used to create the data dictionary was crucial to the risk score creation process. Hence, the success of using such an approach heavily relies on involving end users in the creation of social media tools.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The technical setup and maintenance needed for a social media data collection platform is an important consideration for scalability. Extensive effort and resources were required to achieve HIPAA compliance for our system. Additionally, creating the data collector required custom handling of each data source. For instance, Facebook's policy change called for a completely different approach to data collecting in the middle of the study. To provide a scalable solution, the data collection platform should be flexible enough to adjust to a new collection regime due to circumstances outside of the study's control.</p>
        <p>Several outcomes did not pass our exclusion criteria filter at the beginning of the study due to a significant imbalance in their respective distributions, leading to challenges for biomarker prediction. Additionally, employing methods that handle class imbalance may improve our current F1 scores. At a higher level, it is also important to note that the scope of this paper is somewhat limited since we only collected data from a few websites, which may not be used by the entire community of men who have sex with men.</p>
      </sec>
      <sec>
        <title>Implications</title>
        <p>Self-report is a common form of feedback for health behavior interventions. While useful, it is often flawed because of biases such as recall bias and social desirability bias. Therefore, the health behavior data in this study were combined with biologically verified data to calculate risk. We found that social media may serve as a valid intervention modality as it provides both valuable and relevant feedback. Overall, we determined that combining self-report data with biologically verified outcomes and social media data mining gives a more nuanced and accurate picture of health behavior risk. These findings suggest that it is feasible to use social media data for future public health intervention.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we created an end-to-end system that leverages social media data for health behavior risk identification, serving as a proof of concept for social media–based behavioral intervention. We demonstrated that it is possible to build an integrative system across multiple platforms that effectively collects meaningful social media data. We determined that social media messages are a valuable source of examining the relationship between health risk behaviors and biologically verified sexual diseases such as HIV and illicit usage of amphetamines and methamphetamines among men who have sex with men. Adapted CDC health risk scores were compared against social media–based behavioral risk scores and found to be, on average, similar to the expertly assessed scores. This validates the feasibility of employing a social media–based behavioral intervention. The contributions made in this paper are stepping stones toward building an automated, cost-effective, fully scalable social media intervention system that serves the public health domain.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">HIPAA</term>
          <def>
            <p>Health Insurance Portability and Accountability Act</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">HIV</term>
          <def>
            <p>human immunodeficiency virus</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We thank all the participants of this study for their time and cooperation. We are grateful for our collaborators and researchers. We would like to thank Christian Paneda, Omkar Patil, Tom Weng, and Amrutha Srinivasan for helping to maintain the data collection system. IWH and ESCW are supported by the California HIV/AIDS Research Program (RP15-LA-007) and the University of California, Los Angeles Center for HIV Identification, Prevention, and Treatment Services (P30 MH058107). This study was also partially supported by National Institute on Drug Abuse (R03 DA039752).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>2018 STD surveillance report</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2019</year>
          <month>11</month>
          <day>12</day>
          <access-date>2020-11-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/nchhstp/newsroom/2019/2018-STD-surveillance-report.html">https://www.cdc.gov/nchhstp/newsroom/2019/2018-STD-surveillance-report.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>HIV surveillance reports</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2020-11-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/hiv/library/reports/hiv-surveillance.html">https://www.cdc.gov/hiv/library/reports/hiv-surveillance.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Purcell</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Lansky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Prejean</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Denning</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gau</surname>
              <given-names>Zaneta</given-names>
            </name>
            <name name-style="western">
              <surname>Weinstock</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Crepaz</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Estimating the population size of men who have sex with men in the United States to obtain HIV and syphilis rates</article-title>
          <source>Open AIDS J</source>
          <year>2012</year>
          <month>9</month>
          <day>7</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>98</fpage>
          <lpage>107</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23049658"/>
          </comment>
          <pub-id pub-id-type="doi">10.2174/1874613601206010098</pub-id>
          <pub-id pub-id-type="medline">23049658</pub-id>
          <pub-id pub-id-type="pii">TOAIDJ-6-98</pub-id>
          <pub-id pub-id-type="pmcid">PMC3462414</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buchacz</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>McFarland</surname>
              <given-names>Willi</given-names>
            </name>
            <name name-style="western">
              <surname>Kellogg</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Loeb</surname>
              <given-names>Lisa</given-names>
            </name>
            <name name-style="western">
              <surname>Holmberg</surname>
              <given-names>Scott D</given-names>
            </name>
            <name name-style="western">
              <surname>Dilley</surname>
              <given-names>James</given-names>
            </name>
            <name name-style="western">
              <surname>Klausner</surname>
              <given-names>Jeffrey D</given-names>
            </name>
          </person-group>
          <article-title>Amphetamine use is associated with increased HIV incidence among men who have sex with men in San Francisco</article-title>
          <source>AIDS</source>
          <year>2005</year>
          <month>09</month>
          <day>02</day>
          <volume>19</volume>
          <issue>13</issue>
          <fpage>1423</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1097/01.aids.0000180794.27896.fb</pub-id>
          <pub-id pub-id-type="medline">16103774</pub-id>
          <pub-id pub-id-type="pii">00002030-200509020-00011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hoenigl</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chaillon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Little</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Clear links between starting methamphetamine and increasing sexual risk behavior</article-title>
          <source>J AIDS</source>
          <year>2016</year>
          <volume>71</volume>
          <issue>5</issue>
          <fpage>551</fpage>
          <lpage>557</lpage>
          <pub-id pub-id-type="doi">10.1097/qai.0000000000000888</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Weatherburn</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Links between transactional sex and HIV/STI-risk and substance use among a large sample of European men who have sex with men</article-title>
          <source>BMC Infect Dis</source>
          <year>2019</year>
          <month>08</month>
          <day>05</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>686</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcinfectdis.biomedcentral.com/articles/10.1186/s12879-019-4326-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12879-019-4326-3</pub-id>
          <pub-id pub-id-type="medline">31382923</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12879-019-4326-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6683343</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grov</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Breslow</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Newcomb</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenberger</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Bauermeister</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Gay and bisexual men's use of the internet: research from the 1990s through 2013</article-title>
          <source>J Sex Res</source>
          <year>2014</year>
          <volume>51</volume>
          <issue>4</issue>
          <fpage>390</fpage>
          <lpage>409</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24754360"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/00224499.2013.871626</pub-id>
          <pub-id pub-id-type="medline">24754360</pub-id>
          <pub-id pub-id-type="pmcid">PMC4154140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bien</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Best</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Muessig</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Gay apps for seeking sex partners in China: implications for MSM sexual health</article-title>
          <source>AIDS Behav</source>
          <year>2015</year>
          <month>06</month>
          <day>9</day>
          <volume>19</volume>
          <issue>6</issue>
          <fpage>941</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25572834"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10461-014-0994-6</pub-id>
          <pub-id pub-id-type="medline">25572834</pub-id>
          <pub-id pub-id-type="pmcid">PMC4475493</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosser</surname>
              <given-names>BRS</given-names>
            </name>
            <name name-style="western">
              <surname>Wilkerson</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Smolenski</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Oakes</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Konstan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Horvath</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kilian</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Novak</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Danilenko</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The future of Internet-based HIV prevention: a report on key findings from the Men's INTernet (MINTS-I, II) Sex Studies</article-title>
          <source>AIDS Behav</source>
          <year>2011</year>
          <month>04</month>
          <day>1</day>
          <volume>15 Suppl 1</volume>
          <issue>S1</issue>
          <fpage>S91</fpage>
          <lpage>100</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21360127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10461-011-9910-5</pub-id>
          <pub-id pub-id-type="medline">21360127</pub-id>
          <pub-id pub-id-type="pmcid">PMC3345812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bourne</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Reid</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hickson</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Torres-Rueda</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weatherburn</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Illicit drug use in sexual settings ('chemsex') and HIV/STI transmission risk behaviour among gay men in South London: findings from a qualitative study</article-title>
          <source>Sex Transm Infect</source>
          <year>2015</year>
          <month>12</month>
          <day>09</day>
          <volume>91</volume>
          <issue>8</issue>
          <fpage>564</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1136/sextrans-2015-052052</pub-id>
          <pub-id pub-id-type="medline">26163510</pub-id>
          <pub-id pub-id-type="pii">sextrans-2015-052052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shrestha</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Altice</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Copenhaver</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wickersham</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Saifi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ab Halim</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Naning</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kamarulzaman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Use of smartphone to seek sexual health information online among Malaysian men who have sex with men (MSM): implications for mHealth intervention to increase HIV testing and reduce HIV risks</article-title>
          <source>J Community Health</source>
          <year>2020</year>
          <month>02</month>
          <day>2</day>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>10</fpage>
          <lpage>19</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31375976"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10900-019-00713-x</pub-id>
          <pub-id pub-id-type="medline">31375976</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10900-019-00713-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC6957731</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grosskopf</surname>
              <given-names>NA</given-names>
            </name>
            <name name-style="western">
              <surname>LeVasseur</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Glaser</surname>
              <given-names>DB</given-names>
            </name>
          </person-group>
          <article-title>Use of the Internet and mobile-based "apps" for sex-seeking among men who have sex with men in New York City</article-title>
          <source>Am J Mens Health</source>
          <year>2014</year>
          <month>11</month>
          <day>20</day>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>510</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1557988314527311?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1557988314527311</pub-id>
          <pub-id pub-id-type="medline">24658284</pub-id>
          <pub-id pub-id-type="pii">1557988314527311</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Aynalem</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>LV</given-names>
            </name>
            <name name-style="western">
              <surname>Montoya</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kerndt</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Methamphetamine use and sexual risk behaviours among men who have sex with men diagnosed with early syphilis in Los Angeles County</article-title>
          <source>Int J STD AIDS</source>
          <year>2007</year>
          <month>02</month>
          <day>25</day>
          <volume>18</volume>
          <issue>2</issue>
          <fpage>93</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17331279"/>
          </comment>
          <pub-id pub-id-type="doi">10.1258/095646207779949709</pub-id>
          <pub-id pub-id-type="medline">17331279</pub-id>
          <pub-id pub-id-type="pmcid">PMC6783809</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liau</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Millett</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Meta-analytic examination of online sex-seeking and sexual risk behavior among men who have sex with men</article-title>
          <source>Sex Transm Dis</source>
          <year>2006</year>
          <month>09</month>
          <volume>33</volume>
          <issue>9</issue>
          <fpage>576</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1097/01.olq.0000204710.35332.c5</pub-id>
          <pub-id pub-id-type="medline">16540884</pub-id>
          <pub-id pub-id-type="pii">00007435-900000000-00007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hiller</surname>
              <given-names>JE</given-names>
            </name>
          </person-group>
          <article-title>Differences between internet and community samples of MSM: implications for behavioral surveillance among MSM in China</article-title>
          <source>AIDS Care</source>
          <year>2008</year>
          <month>10</month>
          <day>29</day>
          <volume>20</volume>
          <issue>9</issue>
          <fpage>1128</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.1080/09540120701842829</pub-id>
          <pub-id pub-id-type="medline">18825519</pub-id>
          <pub-id pub-id-type="pii">903106551</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Warden</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <source>ReferralMD</source>
          <access-date>2020-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://getreferralmd.com/2017/01/30-facts-statistics-on-social-media-and-healthcare/">https://getreferralmd.com/2017/01/30-facts-statistics-on-social-media-and-healthcare/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pagoto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Waring</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>May</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>EY</given-names>
            </name>
            <name name-style="western">
              <surname>Kunz</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Hayes</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Oleski</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Adapting behavioral interventions for social media delivery</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <month>01</month>
          <day>29</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>e24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2016/1/e24/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5086</pub-id>
          <pub-id pub-id-type="medline">26825969</pub-id>
          <pub-id pub-id-type="pii">v18i1e24</pub-id>
          <pub-id pub-id-type="pmcid">PMC4752690</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <article-title>Health care costs 101: spending keeps growing</article-title>
          <source>California Health Care Foundation</source>
          <year>2020</year>
          <access-date>2019-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.chcf.org/wp-content/uploads/2019/05/HealthCareCostsAlmanac2019.pdf">https://www.chcf.org/wp-content/uploads/2019/05/HealthCareCostsAlmanac2019.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lemley</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Klausner</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Stafylis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mulatya</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Oden</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Revoredo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shmueli-Blumberg</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hichborn</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McKelle</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Moran</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobs</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Marsch</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>Comparing web-based platforms for promoting HIV self-testing and pre-exposure prophylaxis uptake in high-risk men who have sex with men: protocol for a longitudinal cohort study</article-title>
          <source>JMIR Res Protoc</source>
          <year>2020</year>
          <month>10</month>
          <day>19</day>
          <volume>9</volume>
          <issue>10</issue>
          <fpage>e20417</fpage>
          <lpage>e20417</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchprotocols.org/2020/10/e20417/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20417</pub-id>
          <pub-id pub-id-type="medline">33074164</pub-id>
          <pub-id pub-id-type="pii">v9i10e20417</pub-id>
          <pub-id pub-id-type="pmcid">PMC7605984</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rhodes</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>TP</given-names>
            </name>
            <name name-style="western">
              <surname>Tanner</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Stowers</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bachmann</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>MW</given-names>
            </name>
          </person-group>
          <article-title>Using social media to increase HIV testing among gay and bisexual men, other men who have sex with men, and transgender persons: outcomes from a randomized community trial</article-title>
          <source>Clin Infect Dis</source>
          <year>2016</year>
          <month>06</month>
          <day>01</day>
          <volume>62</volume>
          <issue>11</issue>
          <fpage>1450</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1093/cid/ciw127</pub-id>
          <pub-id pub-id-type="medline">26980878</pub-id>
          <pub-id pub-id-type="pii">ciw127</pub-id>
          <pub-id pub-id-type="pmcid">PMC4872288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Christley</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pinchbeck</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bowers</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Clancy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>French</surname>
              <given-names>N P</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Infection in social networks: using network analysis to identify high-risk individuals</article-title>
          <source>Am J Epidemiol</source>
          <year>2005</year>
          <month>11</month>
          <day>15</day>
          <volume>162</volume>
          <issue>10</issue>
          <fpage>1024</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1093/aje/kwi308</pub-id>
          <pub-id pub-id-type="medline">16177140</pub-id>
          <pub-id pub-id-type="pii">kwi308</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Drumright</surname>
              <given-names>LN</given-names>
            </name>
            <name name-style="western">
              <surname>Frost</surname>
              <given-names>SDW</given-names>
            </name>
          </person-group>
          <article-title>Rapid social network assessment for predicting HIV and STI risk among men attending bars and clubs in San Diego, California</article-title>
          <source>Sex Transm Infect</source>
          <year>2010</year>
          <month>12</month>
          <day>21</day>
          <volume>86 Suppl 3</volume>
          <issue>Suppl 3</issue>
          <fpage>iii17</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1136/sti.2010.045914</pub-id>
          <pub-id pub-id-type="medline">20966457</pub-id>
          <pub-id pub-id-type="pii">sti.2010.045914</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holloway</surname>
              <given-names>IW</given-names>
            </name>
            <name name-style="western">
              <surname>Pulsipher</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Barman-Adhikari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rice</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Network influences on the sexual risk behaviors of gay, bisexual and other men who have sex with men using geosocial networking applications</article-title>
          <source>AIDS Behav</source>
          <year>2015</year>
          <month>06</month>
          <day>9</day>
          <volume>19 Suppl 2</volume>
          <issue>S2</issue>
          <fpage>112</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25572832"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10461-014-0989-3</pub-id>
          <pub-id pub-id-type="medline">25572832</pub-id>
          <pub-id pub-id-type="pmcid">PMC5060094</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holloway</surname>
              <given-names>IW</given-names>
            </name>
          </person-group>
          <article-title>Substance use homophily among geosocial networking application using gay, bisexual, and other men who have sex with men</article-title>
          <source>Arch Sex Behav</source>
          <year>2015</year>
          <month>10</month>
          <volume>44</volume>
          <issue>7</issue>
          <fpage>1799</fpage>
          <lpage>811</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26216146"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10508-015-0581-6</pub-id>
          <pub-id pub-id-type="medline">26216146</pub-id>
          <pub-id pub-id-type="pmcid">PMC4574511</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The use of geosocial networking smartphone applications and the risk of sexually transmitted infections among men who have sex with men: a systematic review and meta-analysis</article-title>
          <source>BMC Public Health</source>
          <year>2018</year>
          <month>10</month>
          <day>16</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>1178</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-018-6092-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-018-6092-3</pub-id>
          <pub-id pub-id-type="medline">30326887</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-018-6092-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6192100</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Centola</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>An experimental study of homophily in the adoption of health behavior</article-title>
          <source>Science</source>
          <year>2011</year>
          <month>12</month>
          <day>2</day>
          <volume>334</volume>
          <issue>6060</issue>
          <fpage>1269</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.sciencemag.org/cgi/pmidlookup?view=long&#38;pmid=22144624"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/science.1207055</pub-id>
          <pub-id pub-id-type="medline">22144624</pub-id>
          <pub-id pub-id-type="pii">334/6060/1269</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eichstaedt</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Crutchley</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Preoţiuc-Pietro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Asch</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
          </person-group>
          <article-title>Facebook language predicts depression in medical records</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2018</year>
          <month>10</month>
          <day>30</day>
          <volume>115</volume>
          <issue>44</issue>
          <fpage>11203</fpage>
          <lpage>11208</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.pnas.org/cgi/pmidlookup?view=long&#38;pmid=30322910"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1802331115</pub-id>
          <pub-id pub-id-type="medline">30322910</pub-id>
          <pub-id pub-id-type="pii">1802331115</pub-id>
          <pub-id pub-id-type="pmcid">PMC6217418</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kalyanam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Katsuki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>R G Lanckriet</surname>
              <given-names>Gert</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>Tim K</given-names>
            </name>
          </person-group>
          <article-title>Exploring trends of nonmedical use of prescription drugs and polydrug abuse in the Twittersphere using unsupervised machine learning</article-title>
          <source>Addict Behav</source>
          <year>2017</year>
          <month>02</month>
          <volume>65</volume>
          <fpage>289</fpage>
          <lpage>295</lpage>
          <pub-id pub-id-type="doi">10.1016/j.addbeh.2016.08.019</pub-id>
          <pub-id pub-id-type="medline">27568339</pub-id>
          <pub-id pub-id-type="pii">S0306-4603(16)30299-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Platteau</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Herrijgers</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>de Wit</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Digital chemsex support and care: the potential of just-in-time adaptive interventions</article-title>
          <source>Int J Drug Policy</source>
          <year>2020</year>
          <month>11</month>
          <volume>85</volume>
          <fpage>102927</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0955-3959(20)30266-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.drugpo.2020.102927</pub-id>
          <pub-id pub-id-type="medline">32932125</pub-id>
          <pub-id pub-id-type="pii">S0955-3959(20)30266-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feller</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zucker</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Using clinical notes and natural language processing for automated HIV risk assessment</article-title>
          <source>J AIDS</source>
          <year>2018</year>
          <volume>77</volume>
          <issue>2</issue>
          <fpage>160</fpage>
          <lpage>166</lpage>
          <pub-id pub-id-type="doi">10.1097/qai.0000000000001580</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Korhonen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Séaghdha</surname>
              <given-names>Diarmuid O</given-names>
            </name>
            <name name-style="western">
              <surname>Silins</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Högberg</surname>
              <given-names>Johan</given-names>
            </name>
            <name name-style="western">
              <surname>Stenius</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Text mining for literature review and knowledge discovery in cancer risk assessment and research</article-title>
          <source>PLoS One</source>
          <year>2012</year>
          <month>4</month>
          <day>12</day>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>e33427</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0033427"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0033427</pub-id>
          <pub-id pub-id-type="medline">22511921</pub-id>
          <pub-id pub-id-type="pii">PONE-D-11-23667</pub-id>
          <pub-id pub-id-type="pmcid">PMC3325219</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yates</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goharian</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Depression and self-harm risk assessment in online forums</article-title>
          <source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2017</year>
          <conf-name>Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>September 7-11</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d17-1322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maher</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrar</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>De Bourdeaudhuij</surname>
              <given-names>Ilse</given-names>
            </name>
            <name name-style="western">
              <surname>Vandelanotte</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Are health behavior change interventions that use online social networks effective? a systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>02</month>
          <day>14</day>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>e40</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/2/e40/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.2952</pub-id>
          <pub-id pub-id-type="medline">24550083</pub-id>
          <pub-id pub-id-type="pii">v16i2e40</pub-id>
          <pub-id pub-id-type="pmcid">PMC3936265</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <article-title>Selenium</article-title>
          <source>Github</source>
          <access-date>2019-12-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/SeleniumHQ/selenium">https://github.com/SeleniumHQ/selenium</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Beautiful Soup documentation</article-title>
          <source>Crummy</source>
          <year>2007</year>
          <access-date>2019-12-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">https://www.crummy.com/software/BeautifulSoup/bs4/doc/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <article-title>HIV risk reduction tool</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2020-06-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/hivrisk/estimator.html">https://www.cdc.gov/hivrisk/estimator.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>Proceedings of Advances in Neural Information Processing System</source>
          <year>2013</year>
          <conf-name>Neural Information Processing Systems</conf-name>
          <conf-date>December 5-8</conf-date>
          <conf-loc>Lake Tahoe, Nevada</conf-loc>
          <fpage>3111</fpage>
          <lpage>3119</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kiros</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zemel</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Unifying visual-semantic embeddings with multimodal neural language models</article-title>
          <year>2014</year>
          <conf-name>31st International Conference on Machine Learning</conf-name>
          <conf-date>June 21-26</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <fpage>595</fpage>
          <lpage>603</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maaten</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Visualizing data using t-SNE</article-title>
          <source>J Mach Learn Res</source>
          <year>2008</year>
          <month>11</month>
          <fpage>2579</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
