<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e57885</article-id>
      <article-id pub-id-type="pmid">39178036</article-id>
      <article-id pub-id-type="doi">10.2196/57885</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Digital Epidemiology of Prescription Drug References on X (Formerly Twitter): Neural Network Topic Modeling and Sentiment Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Matsuda</surname>
            <given-names>Shinichi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wani</surname>
            <given-names>AasimAyaz</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Rao</surname>
            <given-names>Varun K</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-7704-1974</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Valdez</surname>
            <given-names>Danny</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2355-9881</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Muralidharan</surname>
            <given-names>Rasika</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-0362-4890</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Agley</surname>
            <given-names>Jon</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2345-8850</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Eddens</surname>
            <given-names>Kate S</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7783-4156</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Dendukuri</surname>
            <given-names>Aravind</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7826-7114</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Panth</surname>
            <given-names>Vandana</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-3298-1889</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Parker</surname>
            <given-names>Maria A</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Applied Health Science</institution>
            <institution>School of Public Health Bloomington</institution>
            <institution>Indiana University Bloomington</institution>
            <addr-line>809 E. 9th St.</addr-line>
            <addr-line>Bloomington, IN, 47405</addr-line>
            <country>United States</country>
            <phone>1 812 856 5950</phone>
            <email>map2@iu.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9763-1129</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Epidemiology &amp; Biostatistics</institution>
        <institution>School of Public Health Bloomington</institution>
        <institution>Indiana University Bloomington</institution>
        <addr-line>Bloomington, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Applied Health Science</institution>
        <institution>School of Public Health Bloomington</institution>
        <institution>Indiana University Bloomington</institution>
        <addr-line>Bloomington, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Luddy School of Informatics, Computing and Engineering</institution>
        <institution>Indiana University Bloomington</institution>
        <addr-line>Bloomington, IN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Maria A Parker <email>map2@iu.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>23</day>
        <month>8</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e57885</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>2</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>18</day>
          <month>5</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>12</day>
          <month>6</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>7</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Varun K Rao, Danny Valdez, Rasika Muralidharan, Jon Agley, Kate S Eddens, Aravind Dendukuri, Vandana Panth, Maria A Parker. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 23.08.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e57885" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Data from the social media platform X (formerly Twitter) can provide insights into the types of language that are used when discussing drug use. In past research using latent Dirichlet allocation (LDA), we found that tweets containing “street names” of prescription drugs were difficult to classify due to the similarity to other colloquialisms and lack of clarity over how the terms were used. Conversely, “brand name” references were more amenable to machine-driven categorization.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study sought to use next-generation techniques (beyond LDA) from natural language processing to reprocess X data and automatically cluster groups of tweets into topics to differentiate between street- and brand-name data sets. We also aimed to analyze the differences in emotional valence between the 2 data sets to study the relationship between engagement on social media and sentiment.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used the Twitter application programming interface to collect tweets that contained the street and brand name of a prescription drug within the tweet. Using BERTopic in combination with Uniform Manifold Approximation and Projection and k-means, we generated topics for the street-name corpus (n=170,618) and brand-name corpus (n=245,145). Valence Aware Dictionary and Sentiment Reasoner (VADER) scores were used to classify whether tweets within the topics had positive, negative, or neutral sentiments. Two different logistic regression classifiers were used to predict the sentiment label within each corpus. The first model used a tweet’s engagement metrics and topic ID to predict the label, while the second model used those features in addition to the top 5000 tweets with the largest term-frequency–inverse document frequency score.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Using BERTopic, we identified 40 topics for the street-name data set and 5 topics for the brand-name data set, which we generalized into 8 and 5 topics of discussion, respectively. Four of the general themes of discussion in the brand-name corpus referenced drug use, while 2 themes of discussion in the street-name corpus referenced drug use. From the VADER scores, we found that both corpora were inclined toward positive sentiment. Adding the vectorized tweet text increased the accuracy of our models by around 40% compared with the models that did not incorporate the tweet text in both corpora.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>BERTopic was able to classify tweets well. As with LDA, the discussion using brand names was more similar between tweets than the discussion using street names. VADER scores could only be logically applied to the brand-name corpus because of the high prevalence of non–drug-related topics in the street-name data. Brand-name tweets either discussed drugs positively or negatively, with few posts having a neutral emotionality. From our machine learning models, engagement alone was not enough to predict the sentiment label; the added context from the tweets was needed to understand the emotionality of a tweet.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>digital epidemiology</kwd>
        <kwd>BERTtopic</kwd>
        <kwd>Valence Aware Dictionary and Sentiment Reasoner</kwd>
        <kwd>VADER</kwd>
        <kwd>sentiment analysis</kwd>
        <kwd>social media</kwd>
        <kwd>prescription drugs</kwd>
        <kwd>prescription</kwd>
        <kwd>prescriptions</kwd>
        <kwd>drug</kwd>
        <kwd>drugs</kwd>
        <kwd>drug use</kwd>
        <kwd>platform X</kwd>
        <kwd>Twitter</kwd>
        <kwd>tweet</kwd>
        <kwd>tweets</kwd>
        <kwd>latent Dirichlet allocation</kwd>
        <kwd>machine-driven</kwd>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
        <kwd>brand name</kwd>
        <kwd>logistic regression</kwd>
        <kwd>machine learning</kwd>
        <kwd>health informatics</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Current State of Social Media for Public Health Surveillance</title>
        <p>Social networking websites such as X (formerly Twitter), Facebook, and Instagram are often described as “digital town squares” [<xref ref-type="bibr" rid="ref1">1</xref>], where people can openly and freely have conversations and discussions about nearly any topic or issue, including those that may not be legal, ethical, or socially acceptable. The broad use and open nature of these conversations have led researchers to use social media to monitor and surveil real-world issues pertaining to public health [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. For example, previous studies have analyzed social media data to develop a real-time influenza surveillance dashboard [<xref ref-type="bibr" rid="ref6">6</xref>]; monitor the language associated with stress, loneliness, and anxiety during the early months of the US COVID-19 outbreak [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]; and track public responses to critical news cycles [<xref ref-type="bibr" rid="ref9">9</xref>], including recent shifts in US abortion legality [<xref ref-type="bibr" rid="ref10">10</xref>]. These types of projects focus on extrapolating “real-world” data (such as prevalence rates of influenza or anxiety) from social media discourse. Importantly, numerous practical, analytic, and ethical issues remain to be studied and addressed around the use of social media data for projects that have the potential to directly or immediately impact public or personal health [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>There is a subtle distinction between work seeking to estimate health-related factors such as disease prevalence rates from social media (ie, as described in the previous paragraph) and research specifically focused on understanding public conversations and discourse on social media. There are likely still biases inherent in such analyses (eg, nonindependence of data, platforms’ algorithmic drivers of conversation, and trending topics) [<xref ref-type="bibr" rid="ref3">3</xref>]. At the same time, analysis of discourse does not attempt to extrapolate secondary or tertiary data points outside of the dialogue itself. Instead, it approaches social media as a lens through which we can view naturally occurring conversations to provide insights about the “state of discourse” in the population of social media users. Such conversations have been studied around a diverse multitude of topics, such as national parks in South Africa [<xref ref-type="bibr" rid="ref12">12</xref>], healthy diets [<xref ref-type="bibr" rid="ref13">13</xref>], COVID-19 vaccines [<xref ref-type="bibr" rid="ref5">5</xref>], and mental health during Mental Health Awareness Week [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      </sec>
      <sec>
        <title>Whose Conversations and Discourse Can Readily Be Studied?</title>
        <p>A substantial majority of US persons aged 18 to 64 years use social media, as do nearly half of those aged ≥65 years [<xref ref-type="bibr" rid="ref15">15</xref>]. Therefore, large-scale analyses of posts on social media can be used to infer how the general population might feel about specific issues (though with caution, as noted in the previous paragraph [<xref ref-type="bibr" rid="ref3">3</xref>]). One important caveat, though, is that different platforms have different user demographics, and people use different platforms at various rates [<xref ref-type="bibr" rid="ref16">16</xref>]. For instance, data from several years ago suggest that users of Facebook tend to be older (aged ≥50 years), while users of X (Twitter) tend to be younger (aged 18-29 years) [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. These social media dynamics, which may change over time, can influence the types of content that users post and view.</p>
        <p>According to a Pew survey on teenage social media use [<xref ref-type="bibr" rid="ref18">18</xref>] in 2022, about 54% of all teens reported that it would be difficult to give up social media, and among teens who view social media use positively, 46% of teens reported that the main reason they use social media is to connect and socialize with others [<xref ref-type="bibr" rid="ref19">19</xref>]. Previously, we noted that X (Twitter) is primarily used by younger populations. On X (Twitter), individuals can not only connect with one another but can also become part of web-based communities that discuss diverse topics.</p>
      </sec>
      <sec>
        <title>Learning About Drug Use From Social Media Discourse</title>
        <p>The United States is in the midst of a drug overdose epidemic that, in recent years, has claimed &gt;100,000 lives every 12 months [<xref ref-type="bibr" rid="ref20">20</xref>]. While major strides have been made in attenuating the harm from this crisis, such as increasing access to naloxone [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>] and harm-reduction strategies [<xref ref-type="bibr" rid="ref23">23</xref>], the persistence of harmful outcomes associated with drug use suggests that additional strategies and information are needed.</p>
        <p>Typically, information about drug use is obtained from investigator-directed research studies [<xref ref-type="bibr" rid="ref24">24</xref>] (eg, surveys and interviews), and such studies contribute meaningfully to this domain of knowledge. At the same time, such mechanisms rely on researchers’ presuppositions about what questions to ask and what topics are important. In contrast, large-scale analyses of social conversations have the potential to elucidate aspects of drug use about which scholars are unaware, or less aware, but that may be important to facilitating harm reduction. This exploratory work can theoretically be used to identify new research strategies, approaches, and theories around drug use that are grounded in inductive analysis of discourse rather than deduced from existing theoretical frameworks. For example, understanding these informal communities can help public health officials better understand real drug use trends that they might see among younger populations. Similarly, learning about the emotional valence of the discussion of specific substances might help inform context-targeted communication strategies.</p>
        <p>On the basis of user demographics, when collecting data in 2022, we perceived that X (Twitter) would be a useful source of observational data to understand how young people feel about several types of drugs as well as about drug use more generally. Until recently, X (Twitter) was the social media platform of choice for researchers in this space due to the large amount of short-form textual data available on the platform through its application programming interface (API) [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. While recent changes to the API have made research on the platform substantively more prohibitive, recent data sets collected before this change still offer excellent utility. This study used such a data set to better understand the themes, sentiment, and engagement levels for drug-related social media conversations. Principles identified through this work will have utility for natural language processing (NLP) analyses across multiple social media platforms.</p>
      </sec>
      <sec>
        <title>Literature Review</title>
        <p>Specific to drug use and outcomes, researchers have used data from X (Twitter) to identify adverse drug reactions using methods from machine learning [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>], monitor population-level opioid abuse in real time [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>], study user sentiment about specific types of drugs [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], and characterize how young people feel about certain drugs like cannabis and drug use more generally [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. A study by Meng et al [<xref ref-type="bibr" rid="ref37">37</xref>] using data from X (Twitter) found that the types of drugs people used varied by demographic characteristics as well as geographic characteristics. The authors collected 79.8 million tweets and analyzed 699,757 tweets that were related to drug use to find associations between the sentiment recorded in sentiment-related tweets and zip codes by analyzing which drugs were tweeted about the most often using hashtags. Another study by Stevens et al [<xref ref-type="bibr" rid="ref36">36</xref>] identified which drugs are discussed by younger populations and identified specific themes indicating how young people discuss drug use on social media. Both studies sampled a large amount of data but qualitatively coded a subset of their data set. Taken together, these studies suggest that using social media as a barometer to understand public sentiment may be a fast way to ascertain public sentiment without having to use advanced surveying methodology while avoiding certain implicit assumptions that might be made in such surveys.</p>
        <p>Our research builds on these previous studies using a similar-sized X (Twitter) data set to Meng et al [<xref ref-type="bibr" rid="ref37">37</xref>]. However, our approach was distinct; we leveraged unsupervised machine learning techniques to computationally identify the main themes in our drug use tweet data set instead of manually analyzing tweets looking for mentions of specific drug names. In our prior analysis of this data set [<xref ref-type="bibr" rid="ref38">38</xref>], we used latent Dirichlet allocation (LDA) to generate topics based on “street-name” tweets (eg, colloquial terms) or “brand-name” tweets (eg, trademarked or generic terms) [<xref ref-type="bibr" rid="ref39">39</xref>]. Using that method, we found that tweets that fit into these 2 categories had different themes. Tweets that contained the brand or prescription name of a drug (eg, OxyContin, Vicodin, fentanyl, etc) had a higher likelihood of referring to the impact that drug use has in the context of US politics, political conversations, and society at large. This contrasts with tweets that referred to drugs via their street names (eg, Vikes, Oxys, etc), where individuals would, at times, openly and informally discuss their drug use.</p>
        <p>Furthermore, in contrast to tweets using street names, LDA more clearly categorized tweets containing brand names of drugs into specific drug categories, and as noted, many such tweets contained discussion of political events. Tweets containing street names were more difficult to classify using LDA for 2 reasons. First, street names for drugs could often refer to other words with different meanings and contexts, leading to 2 tweets that could contain the same term, for example, “vike,” but refer to different things entirely (eg, Vicodin or the Minnesota Vikings). Second, compared with the brand-name data set, people appeared to use informal terms to discuss drug use in unique or different nonpolitical contexts, leading to more topics being needed to accurately understand the corpus. We concluded from our previous study [<xref ref-type="bibr" rid="ref38">38</xref>] that unsupervised machine learning techniques could be leveraged to understand how the public perceives drug use on social media but that its utility for categorizing tweets using street names for drugs was lower than for tweets using brand-name drugs.</p>
        <p>As noted, our previous work used LDA, which relies on probability distributions and word co-occurrences to determine latent topics. To expand on this work, we leveraged a neural network approach to topic modeling called BERTopic [<xref ref-type="bibr" rid="ref40">40</xref>]. BERTopic relies on semantic word embeddings instead of word co-occurrence, so the algorithm can create coherent topics by understanding the context of each word from pretrained weights. In addition to using BERTopic to perform topic generation, we conducted sentiment analysis on the data that we had collected to understand the intensity and level of emotions associated with each tweet. As part of a larger discussion on digital surveillance of drug-related communication, we sought to expand our previous work by using a more advanced topic modeling tool, in addition to sentiment analysis, to add further context to the types of drug dialogues that may be occurring on the web and to find whether key differences are observed by the type of drug (ie, brand name vs street name). We used Valence Aware Dictionary and Sentiment Reasoner (VADER) scores [<xref ref-type="bibr" rid="ref41">41</xref>] to characterize the intensity of emotions of each tweet and determined the mean VADER scores for each topic. BERTopic, a newer sentiment analysis tool, is widely viewed as a more accurate topic generator than LDA. Using these methods, our research was guided by three specific research questions:</p>
        <list list-type="order">
          <list-item>
            <p>Using a neural network approach to topic modeling, what key semantic and thematic differences are observed in a corpus of tweets pertaining to a drug’s brand name versus street name?</p>
          </list-item>
          <list-item>
            <p>Using a lexicon-based sentiment analysis tool, what lexical differences in sentiment are observed in a corpus of tweets pertaining to a drug’s brand name compared with its street name?</p>
          </list-item>
          <list-item>
            <p>Using logistic regression, can we accurately predict the VADER-generated sentiment label of a tweet (ie, positive, negative, or neutral) from a tweet’s engagement metrics?</p>
          </list-item>
        </list>
        <p>Findings from this study stand to further refine our data by more clearly identifying content not pertaining to drug use or drug communication. The more refined corpus derived from such an approach, with reduced prevalence of extraneous content, can be further leveraged to construct a drug communication classifier that may better assist in analyzing larger, unstructured language data. Furthermore, by comparing results from LDA, a probabilistic approach to topic modeling, and Bidirectional Encoder Representations from Transformers (BERT), a neural network approach to topic modeling, our study stands to document the growing body of research supporting neural network topic modeling as the optimal choice for unsupervised NLP tasks. Importantly, findings from this study can also inform an additional pipeline to construct a classifier pertaining to drug communication on the web.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection</title>
        <p>Data for this study were collected from X, the social networking website formerly known as Twitter, between October and December 2022 before the discontinuation of its open-access API. To obtain the data relevant to this study, we leveraged the National Institute on Drug Abuse’s list of commonly abused prescription drugs to create the brand-name corpus. See <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> for a list of all queried drugs, parsed by brand and street names.</p>
        <boxed-text id="box1" position="float">
          <title>X (formerly Twitter) application programming interface queries by brand name and pseudonym (street name).</title>
          <p>
            <bold>Brand-name queries (n=31)</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Xanax, Percocet, Oxycontin, Vicodin, Fentanyl, Opana, Kadian, Avinza, Adderall, Ritalin, Ambien, Sonata, Lunesta, Valium, Librium, Halcion, Ativan, Amytal, Nembutal, Seconal, Roxanol, Duramorph, Actiq, Duragesic, Sublimaze, Tylox, Percodan, Biphetamine, Dexedrine, Concerta, MDMA</p>
            </list-item>
          </list>
          <p>
            <bold>Street-name queries (n=33)</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Hillbilly Heroin, Oxy, Oxy 80s, Rushbo, Blue Mollies, Black Mollies, Percs, Happy Pills, Barbs, Phennies, Tooies, Downers, Tranks, A-Minus, Zombie Pills, Skippy, The Smart Drug, Vitamin R, Benzos, Benzies, R-Ball, Crystal Meth, Pep Pills, Ludes, Hydros, Idiot Pills, Watson 387, Dexy, Dexies, Ampes, Super Jellies, Speed Pill, Uppers</p>
            </list-item>
          </list>
        </boxed-text>
        <p>Using this list as a reference point, we created bots to run strategic queries and Boolean phrases to collect tweets containing a reference to ≥1 prescription or street-drug names. As a comparative study, we triaged all tweets into one of the following two corpora: (1) a brand-name corpus that comprised all tweets with reference to prescription drugs, branded or technical names, and (2) a street-name corpus that comprised all tweets with reference to colloquial names for those drugs. Before cleaning, we had collected 362,216 (38.79%) tweets containing street-name references and 571,564 (61.21%) tweets that contained references to prescription brand-name drugs, totaling 933,780 tweets. After cleaning the data, which involved standardizing the text to identify and remove duplicates, the brand-name corpus contained 245,145 tweets and the street-name corpus contained 170,618 tweets, for a composite sample size of 415,763 (see Parker et al [<xref ref-type="bibr" rid="ref38">38</xref>] for further insights into the development of this corpus).</p>
      </sec>
      <sec>
        <title>Approaches</title>
        <sec>
          <title>Overview</title>
          <p>In this study, we combined a variety of NLP and machine learning tasks, including those pertaining to theme generation (neural network topic models), dimensionality reduction, and sentiment detection using VADER. We also used an informal qualitative review of our data and exploratory multinomial logistic regression. We explain each briefly below.</p>
        </sec>
        <sec>
          <title>Neural Network Topic Modeling</title>
          <p>Topic modeling refers to an NLP technique that uses a series of calculations to extract latent topics or themes from a collection of related documents or texts. We used a neural network topic modeling pipeline by generating topics using BERT vectors. BERT is a powerful, state-of-the-art transformer-based language retrain model that can understand the context and meaning of words and sentences by comparing input data against a large-scale, pretrained data set. BERTopic is a topic modeling technique that uses BERT vectors to extract latent topics from corpora using one of many pretrained transformer models [<xref ref-type="bibr" rid="ref42">42</xref>]. BERT’s ability to generate high-quality word embeddings with clustering techniques produces coherent and semantically and contextually meaningful topics from a corpus of documents. Because the meaning of a word can change depending on the context, this is particularly useful for textual data analysis.</p>
        </sec>
        <sec>
          <title>Dimensionality Reduction</title>
          <p>Calculating BERT embeddings generated for corpora is computationally expensive and requires substantial computing power to run effectively. Therefore, dimensionality reduction, the process of transforming high-dimensional data into lower-dimensional data while retaining key elements of the data, is a key component of the topic extraction process. To accomplish this, we used 2 approaches: Uniform Manifold Approximation and Projection, a dimensionality reduction tool that can better detect the complex relationships between tweets on the basis of their language, and k-means clustering (k-means), a popular algorithm used for classification, clustering, and topic modeling, which was used as a clustering algorithm to perform topic modeling on BERT embeddings of the corpus data. The fundamental principle of k-means is to split a data set into k-clusters by defining k-centroid values in feature space. These centroids are initially randomly assigned and used to define the clusters. Through iterative assignment, the centroids are updated on the basis of how the data points are placed in the feature space. The choice of “k,” representing the number of clusters to consider, is a critical parameter that can be tuned to control the algorithm’s sensitivity to local variations in the data.</p>
          <p>To find the number of k-topics, we measured the coherence score of different topic configurations. A coherence score [<xref ref-type="bibr" rid="ref43">43</xref>] is derived from an iterative analysis to identify the optimal number of topics for a given corpus. Coherence scores are a way to evaluate the efficacy of topic models by measuring how well our topics represent the text corpora they are based on. A coherence score ranges from 0 to 1, and larger scores theoretically equate to more interpretable topics.</p>
        </sec>
        <sec>
          <title>Sentiment Analysis</title>
          <p>We used VADER [<xref ref-type="bibr" rid="ref41">41</xref>] to analyze and score the emotionality of our text. VADER is a rule-based tool for sentiment analysis that uses a specialized lexicon to capture both the polarity (positive, negative, and neutral) and the intensity of the sentiments expressed in a text. Unlike traditional sentiment analysis, VADER focuses on context-dependent emotional tones and accounts for nuanced sentiment expressions. This makes VADER particularly useful in deciphering sentiment in social media text, customer reviews, and informal communication, where conventional sentiment analysis techniques might fall short. VADER uses a lexicon of words and phrases, each of which is assigned a sentiment score based on their emotional connotations. Then, from the word order and sentence structure of a document, the intensity of the sentiment changes. For example, a phrase such as “Yay. Another phone interview” has a different sentiment score from “Yay! Another phone interview!” due to the extra exclamation marks, which would result in an increase in the intensity of the score. Sentiment scores in VADER range from –1 (very high negative valence) to +1 (very high positive valence). The sentiment score associated with a tweet is calculated by adding the individual sentiment valence scores from each word that corresponds to a word in the VADER lexicon and considering the punctuation and capitalization of a tweet to adjust the score accordingly. That value is then normalized from –1 to +1. We refer to this as the normalized, weighted VADER compound score (or compound score more generally). Using this number, we can measure the strength of the emotions associated with a tweet. After finding the sentiment compound score, we then classify the score into 3 labels: positive, negative, or neutral. A <italic>neutral sentiment</italic> is any sentiment where the score is between, but does not include, –0.05 and 0.05 [<xref ref-type="bibr" rid="ref44">44</xref>]. A <italic>positive sentiment</italic> is defined as any VADER score ≥0.05, while a <italic>negative sentiment</italic> is any score ≤–0.05. We then report the percentage of tweets that are positive, negative, or neutral in our corpus. Given our research questions, we extracted a compound VADER score (with a possible range of –0.99 to 0.99) and a label (positive, negative, or neutral) based on our cutoff criteria. Our use of VADER is strongly supported in computational health science research [<xref ref-type="bibr" rid="ref45">45</xref>-<xref ref-type="bibr" rid="ref48">48</xref>].</p>
        </sec>
        <sec>
          <title>Informal Manual Review</title>
          <p>After we extracted latent topics, we applied a sorting function in which tweets in our corpus were assigned to one of the k-corresponding topics on the basis of the presence of topic keywords. Once data in both corpora were sorted into topics, we briefly reviewed a select number of posts for each topic to add context to topic names and keywords. This process is standard for topic modeling analyses, as computers can only extract latent topics and cannot infer deeper meaning with unsupervised NLP methods.</p>
        </sec>
        <sec>
          <title>Sentiment Label Prediction</title>
          <p>All tweets in our study were collected with their engagement metrics, including likes, replies, and retweets. Previous research [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>] suggests that certain facets of language including affect (or sentiment), tone, and content are associated with highly positive or negative sentiment content, which in turn is associated with higher engagement on social media. While different engagement metrics (likes, retweets, and replies) are associated with different meanings for people [<xref ref-type="bibr" rid="ref51">51</xref>], individuals engage more with highly inflammatory content [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. However, there is some disagreement about whether positive or negative content is engaged with more frequently [<xref ref-type="bibr" rid="ref53">53</xref>]. Here, our objective was to determine whether we could predict the sentiment label of a tweet given its BERT-generated topic and the number of likes, retweets, and replies it has. The sentiment label of a tweet is +1, or 0, or –1, signifying a positive, neutral, or negative sentiment polarity for that tweet, respectively. From past research, we know that tweets with highly emotional language are retweeted more and generally receive more engagement [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]. We hypothesized that knowing the general content of a post (which is what the topic ID will tell us) and how engaged users are with a tweet would allow for accurate prediction of the sentiment label. To test that idea, for each corpus, we created a regression model to find whether labels can be predicted without needing the tweet text. These models contain covariates; engagement metrics (number of likes, retweets, and replies); and generated topic IDs. In addition, we compared this model with another model that used these variables and added the term-frequency–inverse document frequency (TF-IDF) vectorized clean-tweet text as a covariate to understand if word context was needed to accurately predict the sentiment label. TF-IDF vectorization [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>] is a method to convert the textual information of a document to a numerical representation where each word in the document is converted to a number representing how important that word is in the corpus. This makes it easier to compare how similar 2 documents are in the corpus. In our exploratory regression models, we used the top 5000 features from each corpus based on the generated TF-IDF scores. By comparing these 2 models, we determined the effect that the context of a tweet has on predicting the emotionality associated with the tweet.</p>
          <p>To predict the sentiment labels for each tweet, we used a multinomial multivariate logistic regression model. The purpose of this model was to classify tweets into one of the following three categories: positive (+1), negative (–1), or neutral (0) sentiment tweets. We implemented a classifier that used logistic regression to find the label for each tweet. Since we were interested in whether the label itself could be predicted using engagement metrics and the topic ID, we did not use any specific label type as a reference group and used the one-vs-rest heuristic method to classify labels. To evaluate the efficacy of our models, we used the <italic>F</italic><sub>1</sub>-score, precision, recall, and accuracy metrics to compare all models. The accuracy metric measures how often the predicted label from a model matches the true sentiment label, while the precision metric measures the proportion of true positives found by the model. The recall metric measures the proportion of true positives identified divided by the sum of true positives and false negatives, while the <italic>F</italic><sub>1</sub>-score can be defined as the harmonic mean of the recall and precision metrics. This score is the definitive measure of how well a model correctly predicts values since, unlike accuracy, it considers how often the model classifies outcomes as false positives and false negatives. We used the macroversion of the <italic>F</italic><sub>1</sub>-score, recall, and precision metrics to account for label imbalance. These metrics are standard for this type of modeling procedure [<xref ref-type="bibr" rid="ref57">57</xref>] (for more information on macrologistic regression with <italic>F</italic><sub>1</sub>-score, recall, and precision metrics, see Tarekegn et al [<xref ref-type="bibr" rid="ref58">58</xref>] and Manning et al [<xref ref-type="bibr" rid="ref59">59</xref>]). The <italic>sklearn</italic> package (scikit learn) was used to train and test the regression models, and VADER sentiment analysis tools were used from the VADER sentiment python package [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study data were collected using the formerly available Twitter API. All study data consisted of public “Tweets” on the Twitter or X platform. For the sake of this study, usernames and location data were not used for any part of the analysis. Collection and analyses of these data was designated by the Indiana University Institutional Review Board as Exempt (#18081).</p>
      </sec>
      <sec>
        <title>Procedure</title>
        <sec>
          <title>Data Collection</title>
          <p>Over 3 months, we continuously collected data via the (formerly) openly accessible X (Twitter) API using the search terms outlined in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>. For all brand-specific queries (eg, Adderall, Vicodin, Percocet, etc), we created a singular composite data set, hereafter referred to as the brand corpus (n=245,145), after initially collecting 571,564 brand-related tweets. For all colloquial, slang, or other similar mentions of a drug (ie, Addies, Vikes, Perks, etc), we created a second composite data set, hereafter referred to as the street corpus (n=170,618) after initially collecting 362,216 tweets.</p>
        </sec>
        <sec>
          <title>Data Cleaning for BERT and VADER Tasks</title>
          <p>After collecting tweets, we began processing the data ahead of the BERT, VADER, and regression analysis. For each data set, we first created a new column named “clean_text,” where we copied the nonpreprocessed text. From this new column, we then performed our cleaning operations using regular expressions. First, we removed any URLs, the mention symbol (@), emojis, numbers, punctuation, and special characters. Then, we removed any white space present in each tweet to create consistently spaced text. Next, we removed any unnecessary parts of speech using a lemmatizer in addition to stop words, which typically obfuscate the clarity of topic models. For the BERT analysis, we compositely analyzed the text that was entirely preprocessed, in line with standard topic modeling applications. For the VADER analysis, we analyzed the unprocessed text, in accordance with conventional VADER applications, to ensure that the context (including punctuation, adverbs, and adjectives) was considered in the final sentiment score.</p>
        </sec>
        <sec>
          <title>Coherence Score Calculations</title>
          <p>Once the data were preprocessed, we performed iterative topic models with coherence score calculation to identify optimal model fit, beginning with baseline recommendations outlined by Parker et al [<xref ref-type="bibr" rid="ref38">38</xref>]. To perform an iterative BERT analysis, we tested a range of topic model solutions ranging from 10 to 60 topics, iterating by increments of 10 (eg, <italic>k</italic>=10, 20, 30...60 topics). For the brand-name corpus, we found that a smaller number of topics &lt;10 would be needed to find the optimal coherence score. As such, we tested a range of topics from 5 to 20 in increments of 5 (ie, <italic>k</italic>=5, 10, 15, 20). After each iteration, we calculated a coherence score, which infers the degree to which a human can intuitively understand what a computer-generated topic represents. Higher coherence scores denote greater clarity; lower coherence scores denote lesser clarity. After running all iterations, we identified a different topic solution per corpus. We identified 5 topics (brand-name coherence=0.699) and 40 topics (street-name coherence=0.600) as the optimal topic fit for our data sets. Once we identified the optimal topic solution for the brand and street corpora, we then created a sorting function that triaged all data points into one of the k-respective topics based on keyword matching. After sorting the data, we performed an informal qualitative review to identify the primary topic themes, which were retrospectively named.</p>
        </sec>
        <sec>
          <title>VADER Analysis</title>
          <p>We ran the nonprocessed text through the VADER lexicon. For each entry, we calculated the normalized compound sentiment for each tweet. Then, we labeled tweets as having positive, negative, or neutral sentiments if the compound score for sentiment was ≥0.05, between but not inclusive of 0.05 and –0.05, and ≤0.05, respectively, for each label. This threshold value for sentiment is a common standard when using normalized VADER scores [<xref ref-type="bibr" rid="ref41">41</xref>]. We reported the mean and SD of the compound sentiment score for both corpora. After labeling tweets as positive, negative, and neutral, we counted the number of tweets that contained each label and compared the percentage of positive, negative, and neutral tweets between corpora.</p>
        </sec>
        <sec>
          <title>Regression Analysis</title>
          <p>For the regression analysis, we used the sentiment labels from our VADER analysis, converting the labels from positive, neutral, and negative to +1, 0, and –1. The data set was split (80:20 ratio) for training and testing, respectively. First, we used logistic regression to predict sentiment labels based on the tweet’s topic ID and specific engagement metrics (ie, likes, replies, or retweets). This was conducted separately for each engagement metric; combining them necessitated establishing a method to appropriately weigh the different engagement metrics, since each engagement behavior implies a different degree of “engagement” (eg, “liking” a tweet takes less effort than writing a reply). Next, we applied a multiclass logistic regression to predict sentiment labels, incorporating the topic ID, engagement metrics, and top 5000 features based on their TF-IDF vectorization. Finally, we applied the Limited-memory Broyden-Fletcher-Goldfarb-Shannon optimizer to optimize the weights in our model. We reported the macroaggregated precision [<xref ref-type="bibr" rid="ref59">59</xref>], recall, accuracy, and <italic>F</italic><sub>1</sub>-score metrics among the multinomial models [<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref61">61</xref>]. This specific type of aggregation was performed since the distribution of sentiment labels was fairly balanced.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Research Question 1: Using a Neural Network Approach to Topic Modeling, What Key Semantic and Thematic Differences Are Observed in a Corpus of Tweets Pertaining to a Drug’s Brand Name Versus a Street Name?</title>
        <sec>
          <title>Overview</title>
          <p>Our neural network topic modeling pipeline identified several noteworthy differences in the brand and street-name corpora. This includes optimal topic size in either corpus, scope of the topics, and relative clarity in the final models. <xref ref-type="table" rid="table1">Table 1</xref> provides information about the 5 topics in the brand-name corpus (the optimal number of topics based on the coherence score measurement). In <xref ref-type="table" rid="table2">Table 2</xref>, we report on the themes of each cluster as reported by BERTopic. We contrast this with the findings in <xref ref-type="table" rid="table3">Table 3</xref>, where we searched for 40 topics in the street-name data set. We describe the top 10 words in each topic in <xref ref-type="table" rid="table3">Table 3</xref>; then, we summarize the meaning of the groups in <xref ref-type="table" rid="table4">Table 4</xref>. The groups were determined qualitatively in <xref ref-type="table" rid="table4">Table 4</xref> by cross-referencing <xref rid="figure1" ref-type="fig">Figure 1</xref>, based on which topics were overlapping.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Brand-name topic ID information, including key terms, count, and percentage of topic ID (n=245,145).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="110"/>
              <col width="720"/>
              <col width="170"/>
              <thead>
                <tr valign="top">
                  <td>Topic ID</td>
                  <td>Top 10 search terms per topic</td>
                  <td>Document count, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>0</td>
                  <td>Adderall, Ritalin, ADHD<sup>a</sup>, amphetamine, stimulant, medication, prescription, drug, prescribed, meth</td>
                  <td>76,798 (31.33)</td>
                </tr>
                <tr valign="top">
                  <td>1</td>
                  <td>fentanyl, cartel, Biden, heroin, illegals, crisis, drug, trafficking, Bidens, epidemic</td>
                  <td>59,382 (24.22)</td>
                </tr>
                <tr valign="top">
                  <td>2</td>
                  <td>Psychedelics, LSD<sup>b</sup>, shrooms, psychedelic, drug, ecstasy, weed, pill, cocaine, ketamine</td>
                  <td>40,001 (16.32)</td>
                </tr>
                <tr valign="top">
                  <td>3</td>
                  <td>Xanax, anxiety, Vicodin, drug, pill, prescribed, calm, bar, addicted, panic</td>
                  <td>37,048 (15.11)</td>
                </tr>
                <tr valign="top">
                  <td>4</td>
                  <td>Sonata, Beethoven, piano, symphony, Mozart, composer, concerto, allegro, Chopin, moonlight</td>
                  <td>31,916 (13.02)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>ADHD: attention-deficit/hyperactivity disorder.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>LSD: lysergic acid diethylamide.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Brand-name group information, including key terms, count, and percentage. The qualitative themes were generated based on the top 10 terms seen in Since only 5 topics were found from the BERTopic model, the topics and groups were able to be matched with each other easily (n=245,145).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="130"/>
              <col width="430"/>
              <col width="270"/>
              <col width="170"/>
              <thead>
                <tr valign="top">
                  <td>Group ID</td>
                  <td>Overarching themes</td>
                  <td>Topic IDs in group (topics forming groups in <xref rid="figure2" ref-type="fig">Figure 2</xref>)</td>
                  <td>Document count, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>A</td>
                  <td>Adderall, Ritalin, ADHD<sup>a</sup>, stimulant use</td>
                  <td>0</td>
                  <td>76,798 (31.33)</td>
                </tr>
                <tr valign="top">
                  <td>B</td>
                  <td>music, concerts, posts unrelated to drug use</td>
                  <td>4</td>
                  <td>31,916 (13.02)</td>
                </tr>
                <tr valign="top">
                  <td>C</td>
                  <td>psychedelics, LSD<sup>b</sup>, hallucinogens</td>
                  <td>2</td>
                  <td>40,001 (16.32)</td>
                </tr>
                <tr valign="top">
                  <td>D</td>
                  <td>Xanax, anxiety, depressants</td>
                  <td>3</td>
                  <td>37,048 (15.11)</td>
                </tr>
                <tr valign="top">
                  <td>E</td>
                  <td>fentanyl, overdose, US politics</td>
                  <td>1</td>
                  <td>59,382 (24.22)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>ADHD: attention-deficit/hyperactivity disorder.</p>
              </fn>
              <fn id="table2fn2">
                <p><sup>b</sup>LSD: lysergic acid diethylamide.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Individual street topic information, including key terms, count, and percentage (n=170,618).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="100"/>
              <col width="710"/>
              <col width="190"/>
              <thead>
                <tr valign="top">
                  <td>Topic ID</td>
                  <td>Top 10 search terms per topic</td>
                  <td>Document count, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>0</td>
                  <td>skippy, skippys, skipp, skip, damned, damn, love, darn, f*ck, hell</td>
                  <td>10,703 (6.27)</td>
                </tr>
                <tr valign="top">
                  <td>1</td>
                  <td>barb, barbz, barbed, barbarian, beyhive, fav, time, lmao, stardust, bg</td>
                  <td>10,339 (6.06)</td>
                </tr>
                <tr valign="top">
                  <td>2</td>
                  <td>percs, perc, perk, perky, leave, n***a, shit, im, bruh, bro</td>
                  <td>9634 (5.65)</td>
                </tr>
                <tr valign="top">
                  <td>3</td>
                  <td>playoff, qbs, fumble, nfl, dallas, 49ers, afc, touchdown, offense, qb</td>
                  <td>8916 (5.23)</td>
                </tr>
                <tr valign="top">
                  <td>4</td>
                  <td>pill, happiness, smiling, antidepressant, mood, happy, joy, depression, smile, happiest</td>
                  <td>8480 (4.97)</td>
                </tr>
                <tr valign="top">
                  <td>5</td>
                  <td>meth, crystal, methamphetamine, crystalmeth, drug, cocaine, heroin, coke, methclouds, addict</td>
                  <td>8432 (4.94)</td>
                </tr>
                <tr valign="top">
                  <td>6</td>
                  <td>vikes, vikesbills, losing, winning, playoff, game, win, loss, lose, beat</td>
                  <td>7986 (4.68)</td>
                </tr>
                <tr valign="top">
                  <td>7</td>
                  <td>benzodiazepine, benzos, benzo, xanax, antidepressant, prescribing, antipsychotic, medication, ssri, anxiety</td>
                  <td>7754 (4.54)</td>
                </tr>
                <tr valign="top">
                  <td>8</td>
                  <td>barb, barbz, nicki, minaj, rapper, rap, lil, nickis, gang, grammy</td>
                  <td>7313 (4.29)</td>
                </tr>
                <tr valign="top">
                  <td>9</td>
                  <td>skippy, skippys, taxpayer, tory, cpc, government, trickle, labour, politician, govt</td>
                  <td>7200 (4.22)</td>
                </tr>
                <tr valign="top">
                  <td>10</td>
                  <td>cannabis, marijuana, weed, drug, heroin, psychedelics, shrooms, morphine, cocaine, lsd</td>
                  <td>6926 (4.06)</td>
                </tr>
                <tr valign="top">
                  <td>11</td>
                  <td>crackheads, perc, lean, crack, crackhead, shrooms, percs, coke, drug, weed</td>
                  <td>5611 (3.29)</td>
                </tr>
                <tr valign="top">
                  <td>12</td>
                  <td>upper, lower, higher, high, knockeruppers, taking, pickeruppers, like, hand, took</td>
                  <td>5401 (3.17)</td>
                </tr>
                <tr valign="top">
                  <td>13</td>
                  <td>percs, perc, pop, nigga, poppin, bitch, popping, lil, yo, dat</td>
                  <td>5112 (3)</td>
                </tr>
                <tr valign="top">
                  <td>14</td>
                  <td>skippy, skippys, fact, pathetic, racist, ignorance, hate, claim, troll, false</td>
                  <td>5024 (2.94)</td>
                </tr>
                <tr valign="top">
                  <td>15</td>
                  <td>torch, welder, welding, wgas, oxys, weld, profitable, ox, oxy, kit</td>
                  <td>4965 (2.91)</td>
                </tr>
                <tr valign="top">
                  <td>16</td>
                  <td>trading, stockmarket, market, stock, profit, investing, earnings, investment, marketbreadth, sector</td>
                  <td>4786 (2.81)</td>
                </tr>
                <tr valign="top">
                  <td>17</td>
                  <td>murdered, victim, peadophiles, murder, 911, twitter, social, room, downer, dont</td>
                  <td>4690 (2.75)</td>
                </tr>
                <tr valign="top">
                  <td>18</td>
                  <td>song, release, album, music, 2019, muddy, toe, tpne, weekend, forever</td>
                  <td>4431 (2.6)</td>
                </tr>
                <tr valign="top">
                  <td>19</td>
                  <td>janet, dorothy, barb, betty, robert, love, kitty, miss, rachel, dearest</td>
                  <td>3865 (2.27)</td>
                </tr>
                <tr valign="top">
                  <td>20</td>
                  <td>yellow, referee, ref, penalty, foul, match, fifa, fifaworldcup, england, worldcup</td>
                  <td>3813 (2.23)</td>
                </tr>
                <tr valign="top">
                  <td>21</td>
                  <td>eileen, dexy, dexys, dex, dexies, dexter, dexytools, dexy_buys, dexy_updates, dextools</td>
                  <td>3805 (2.23)</td>
                </tr>
                <tr valign="top">
                  <td>22</td>
                  <td>skol, vikes, vikesbites, skolvikes, gopher, game, team, win, hock, winning</td>
                  <td>3772 (2.21)</td>
                </tr>
                <tr valign="top">
                  <td>23</td>
                  <td>house, budget, buying, buy, home, fixerupper, fixer, buyer, #shopmycloset, renovation</td>
                  <td>3592 (2.11)</td>
                </tr>
                <tr valign="top">
                  <td>24</td>
                  <td>nsfwtwitte, leakedvideos, nsfwtwt, leakedvideo, nsfwtw, nsfwvid, nsfw, discord, skippyleaks, chastitylifestyle</td>
                  <td>3331 (1.95)</td>
                </tr>
                <tr valign="top">
                  <td>25</td>
                  <td>grove, downersgrove, hiring, retailjobs, suburb, downtown, downer, st, naperville, chicago</td>
                  <td>2604 (1.53)</td>
                </tr>
                <tr valign="top">
                  <td>26</td>
                  <td>debbiedowners, debbie, downer, debby, nancy, gue, karen, dah, boebert, owl</td>
                  <td>2488 (1.46)</td>
                </tr>
                <tr valign="top">
                  <td>27</td>
                  <td>peanut, butter, skippy, snack, jelly, jiffy, reeses, chocolate, nuttin, sandwich</td>
                  <td>1969 (1.15)</td>
                </tr>
                <tr valign="top">
                  <td>28</td>
                  <td>gain, daily, gme, reduce, wmt, totalday, mixed, sqqq, amp, pt</td>
                  <td>1874 (1.1)</td>
                </tr>
                <tr valign="top">
                  <td>29</td>
                  <td>central, basketball, varsity, halftime, chicago, tonight, livestream, tournament, illinois, east</td>
                  <td>1487 (0.87)</td>
                </tr>
                <tr valign="top">
                  <td>30</td>
                  <td>56mmuppers, rifle, firearm, ar15, 9mmuppers, 62x39uppers, blackoutuppers, receiver, armed, barrel</td>
                  <td>1418 (0.83)</td>
                </tr>
                <tr valign="top">
                  <td>31</td>
                  <td>rushbo, rushbos, rushie, rush, el, miss, limbaugh, limbaughs, bo, linda</td>
                  <td>619 (0.36)</td>
                </tr>
                <tr valign="top">
                  <td>32</td>
                  <td>jordanpeterson, peterson, jordan, shooter, manson, follower, serotonin, twitter, walmart, fan</td>
                  <td>520 (0.3)</td>
                </tr>
                <tr valign="top">
                  <td>33</td>
                  <td>spy, trader, chatroom, gden, roku, ccl, gmbl, rgr, rcl, wfc</td>
                  <td>410 (0.24)</td>
                </tr>
                <tr valign="top">
                  <td>34</td>
                  <td>pigeon, meth, prison, detained, correctional, backpack, carrying, caught, arrested, smuggle</td>
                  <td>390 (0.23)</td>
                </tr>
                <tr valign="top">
                  <td>35</td>
                  <td>blackoutuppers, grape, blackout, upper, receiver, stainless, 316, 300, defense, tactical</td>
                  <td>377 (0.22)</td>
                </tr>
                <tr valign="top">
                  <td>36</td>
                  <td> volume, callput, xle, plug, overview, 192, ratio, energy, xrxoxy101, total</td>
                  <td>167 (0.1)</td>
                </tr>
                <tr valign="top">
                  <td>37</td>
                  <td>oxy_usdt, wrx_usdt, oxy_usdtsuggested, aln_usdt, xyo_usdt, wncg_usdt, xprt_usdt, usdt, lamb_usdt, aioz_usdt</td>
                  <td>166 (0.1)</td>
                </tr>
                <tr valign="top">
                  <td>38</td>
                  <td>meth, jordanpeterson, peterson, serotonin, manson, stimulant, shooter, follower, jordan, cybermen</td>
                  <td>165 (0.1)</td>
                </tr>
                <tr valign="top">
                  <td>39</td>
                  <td>stock, group, chatroom, trade, trxc, amd, astx, mgm, gmbl, amzn</td>
                  <td>83 (0.05)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Street name grouped topics including overarching themes, document count, and percentage. The qualitative themes were generated based on the top 10 terms seen in From the 40 topics, 8 groups were found from the overlapping topics seen in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Each of the 8 groups has a unique theme associated with it, with differing numbers of topic per theme (n=170,618).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="80"/>
              <col width="520"/>
              <col width="230"/>
              <col width="170"/>
              <thead>
                <tr valign="top">
                  <td>Group ID</td>
                  <td>Overarching themes</td>
                  <td>Topic IDs in group (topics that form groups in <xref rid="figure1" ref-type="fig">Figure 1</xref>)</td>
                  <td>Document count, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>F</td>
                  <td>Group cluster pertaining to sports related topics and themes</td>
                  <td>3, 6, 20, 22, 29</td>
                  <td>25,974 (15.22)</td>
                </tr>
                <tr valign="top">
                  <td>G</td>
                  <td>Group cluster pertaining to pop culture fandoms (eg, the Barbz, a Nicki Minaj fanbase)</td>
                  <td>1, 8, 19, 31</td>
                  <td>22,136 (12.97)</td>
                </tr>
                <tr valign="top">
                  <td>H</td>
                  <td>Grouped cluster pertaining to firearm dialogue and online sales</td>
                  <td>30, 35</td>
                  <td>1795 (1.05)</td>
                </tr>
                <tr valign="top">
                  <td>I</td>
                  <td>Grouped cluster pertaining to stock exchanges (eg, Oxy)</td>
                  <td>15, 16, 32, 36, 37, 39</td>
                  <td>10,687 (6.26)</td>
                </tr>
                <tr valign="top">
                  <td>J</td>
                  <td>Grouped clusters pertaining to Percocet use and access</td>
                  <td>2, 11, 13, 33</td>
                  <td>20,767 (12.12)</td>
                </tr>
                <tr valign="top">
                  <td>K</td>
                  <td>Group of clusters comprising unclear, uncertain topics</td>
                  <td>12, 17, 18, 21, 23, 24, 25, 26</td>
                  <td>30,342 (17.78)</td>
                </tr>
                <tr valign="top">
                  <td>L</td>
                  <td>Grouped clusters pertaining to “Skippy” as a peanut butter brand, drug, and political figure</td>
                  <td>0, 9, 14, 27</td>
                  <td>24,896 (14.59)</td>
                </tr>
                <tr valign="top">
                  <td>M</td>
                  <td>Grouped cluster pertaining to assorted drug use, including meth, crack-cocaine, and others.</td>
                  <td>4, 5, 7, 10, 28, 34, 38</td>
                  <td>34,021 (19.94)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Street corpus intertopic distance map denoting topic overlap. The topics were generated from our BERTopic model, and the themes were decided from qualitative analysis of the posts within each topic. From our iterative BERTopic analysis, the number of topics with the highest coherence of 0.600 was 40.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e57885_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Brand Corpus</title>
          <p>Our iterative BERTopic analysis yielded a 5-topic solution (coherence=0.699). <xref rid="figure2" ref-type="fig">Figure 2</xref> provides a visualization of our data using an intertopic distance map. This map allows us to infer the relative similarity (or high correlation) and dissimilarity (or low correlation) of each topic relative to one another. From <xref rid="figure2" ref-type="fig">Figure 2</xref>, we can infer 5 mutually distinct topics, which is evidenced by the absence of overlap between clusters. When reviewing each cluster’s keywords, we further inferred that each topic pertained to an overarching drug class. Group A principally referred to stimulant use; group B referred to music, concerts, or tweets otherwise not pertaining to drug use; group C referred to psychedelics and hallucinogens; group D referred to depressants; and group E referred to fentanyl use and overdose.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Brand corpus intertopic distance map denoting topic overlap. The topics were generated from our BERTopic model, and the themes were decided from the qualitative analysis of the posts within each topic. From our iterative BERTopic analysis, the number of topics with the highest coherence of 0.699 was 5. ADHD: attention-deficit/hyperactivity disorder; LSD: lysergic acid diethylamide.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e57885_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p><xref ref-type="table" rid="table1">Table 1</xref> offers further context regarding the distribution of topics, while <xref ref-type="table" rid="table2">Table 2</xref> shows the relevant groupings and themes based on the topics in <xref ref-type="table" rid="table1">Table 1</xref> and the clustering shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. We note that the group ID in <xref ref-type="table" rid="table1">Table 1</xref> corresponds to the clusters labeled in <xref rid="figure2" ref-type="fig">Figure 2</xref>. The 2 groups with the greatest prominence were group A (76,798/245,145, 31.33%; <italic>Adderall</italic>, <italic>Ritalin</italic>, <italic>ADHD</italic>, and <italic>stimulant use</italic>) and group E (59,382/245,145, 24.22%; <italic>fentanyl</italic>, <italic>overdose</italic>, <italic>US politics</italic>), comprising &gt;55% of the brand-name corpus. Regarding stimulant use, or group A, we observed a variety of different subthemes, including recreational use (tweet: “being on Adderall is so fun bc i just spent 30 minutes watching tik toks of snoopy dancing to different songs”) and as a current events topic (tweet: “@JoeBiden what is your plan to fix the adderall shortage?”). The second most prominent theme, fentanyl, or group E, was largely centered on discussing the drug in a strongly political and current events context, often spanning overdose rates and the impact of immigration on fentanyl availability (tweet: “They were killed by people with guns. BTW, you also forgot 108,000 people killed by open borders fentanyl in the last year<italic>.</italic>”). Notably, we did not observe much discussion about the recreational use of fentanyl in our data. Groups C (psychedelics) and D (depressants) largely covered recreational uses of these drugs. However, we did observe a body of tweets advertising the sale of hallucinogenic products in states where their use is ostensibly legal (tweet: “I love microdosing and I gladly recommend [redacted] on Instagram they got shrooms LSD dmt MDMA fast shipping and delivery<italic>”</italic>). We classified 13.02% (31,916/245,145) of our data into group B, which we qualitatively deemed to contain posts not specific to drug use. Recurring mentions in group B included music, concerts, and car brands (tweet: “The suspect fled the scene in a white, four-door, Hyundai Sonata with an obscured North Carolina temporary tag, according to police”; tweet: “Nice piece, devils trill sonata is a good choice <inline-graphic xlink:href="jmir_v26i1e57885_fig3.png" xlink:type="simple" mimetype="image"/>.”), which may be explained by the name, Sonata, and its various associations.</p>
        </sec>
        <sec>
          <title>Street Corpus</title>
          <p>Our iterative BERTopic analysis yielded a 40-topic solution for the street corpus (coherence=0.600). <xref rid="figure1" ref-type="fig">Figure 1</xref> visualizes our topics using an intertopic distance map where the overlap denotes high topic correlation, and sparsity indicates low topic correlation. Unlike the brand corpus, which contained 5 nonoverlapping topics that could be easily generalized into specific themes, the 40 topics associated with the street corpus had various degrees of overlap, which indicates highly similar, or correlated, topics. When reviewing the keywords associated with each of the 40 topics and associated distributions (<xref ref-type="table" rid="table3">Table 3</xref>), we categorized our data further along 8 overarching themes as further illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>. More specifically, clusters associated with group F were largely about sports, group G about pop culture fandoms, group H about firearms, group I about the stock exchange, and group J about Percocet, while group K contained unclear focus, group L contained a variety of tweets about “skippy” in various contexts, and group M contained posts about assorted drug use.</p>
          <p><xref ref-type="table" rid="table4">Table 4</xref> offers further context regarding general topic distribution and group clustering. There were fewer topics pertaining exclusively to drugs and drug use in the street corpus. In place of such drug-related conversations, we instead observed a disjointed collection of topics that were either not clear (group K: 30,342/170,618, 17.78% representation) or more succinctly focused on non–drug-related topics including sports (group F: 25,974/170,618, 15.22% representation), pop culture fanbases (group G: 22,136/170,618, 12.97% representation), firearm dialogues and sales (group H: 1795/170,618, 1.05%), stock prices and sales (eg, OXY; group I: 10,687/170,618, 6.26% representation), and myriad uses for the term “skippy,” (group L: 24,896/170,618, 14.59% representation). Importantly, these non–drug-related topics all contained the appropriate query name, yet the foci of the tweets were decisively not drug related. For example, tweets regarding sports referenced the Minnesota Vikings using their common nickname, “the vikes” (tweet: “Ya, the unknown clock. The vikes would get screwed on that one. I promise you that”). For fandom, we observed a substantive body of tweets about Nicki Minaj’s fanbase, commonly referred to as “the barbz” (tweet: “Barbs weird always wanting Nicki to be friends with people who don’t like her”). Barbs, or barbz, also refers to a common street name for barbiturates. For stock prices, tweets referenced Occidental Petroleum Corporation, listed on the US Stock Exchange, as “OXY” (tweet: “I’m also very bullish on $OXY stock”). Skippy often referenced a peanut butter brand (tweet: “id honestly put skippy peanut butter in my top five favorite foods”) and also referenced Canadian politician Pierre Poilievre, leader of the Conservative Party of Canada [<xref ref-type="bibr" rid="ref62">62</xref>] (tweet: “Yet another one that Skippy, nor the Conservatives have a solution to address. Just like when they voted against dental care for children.”). However, despite the noise inherent to these conflated topics, we also observed numerous instances in which a tweet referenced a particular query and was, in fact, drug related.</p>
          <p>After an informal qualitative review, we determined that approximately 32% of posts (groups J and M) pertained directly to drug use. In contexts where a post was about a specific kind of drug use, we observed more direct statements about recreational use. We also determined groups J and M largely, and nearly exclusively, referred to drug use in a recreational and often light-hearted context (tweet: “Honestly, most of the prosecutors I know were also coked out—it’s refreshing to see a cop who loves downers so much”; tweet: “Ohh yeah ladies, I forgot to mention they had me on downers and I smoked pot<italic>.</italic>”).</p>
        </sec>
        <sec>
          <title>Contrast Between Corpora</title>
          <p>We observed both obvious and nuanced differences between corpora. First, the BERT-identified optimal number of topics differed between the brand corpus and the street corpus, which may reflect the relative consistency of brand-related content and the broad diversity of the street-related content. Indeed, in the brand corpus, we observed consistent discussions of a drug in a recreational context. However, we also consistently observed how certain drugs, including fentanyl and Adderall, we often discussed in a current events context (ie, the nationwide Adderall shortage) or in a sociopolitical context (ie, immigration and its effects on fentanyl distribution along the southern border). These more formal pockets of conversation were almost entirely lacking in the street corpus where only a small portion of the tweets explicitly mentioned drug use; nevertheless, we acknowledge that a full review of each tweet was not undertaken. When it was apparent that a tweet contained an appropriate query but no mention of a drug, we observed the content pertaining to the term’s other potential applications or uses. Unique to the street corpus seemed to be more positive mentions of a given drug, typically in a recreational use context or as a light-hearted exchange. Many tweets in the street corpus also had limited context, making it difficult for a computer or members of the study team to appropriately categorize (tweet: “OMG. I love the barbz so much”; tweet: “Gotta love my Vikes”). Thus, despite leveraging a more refined algorithm to conduct a topic modeling analysis (in contrast to our prior use of LDA), there was still an inherent messiness to these data that require further refinement and consideration.</p>
        </sec>
      </sec>
      <sec>
        <title>Research Question 2: Using a Lexicon-Based Sentiment Analysis Tool, What Lexical Differences in Sentiment Are Observed in a Corpus of Tweets Pertaining to a Drug’s Brand Name Compared With its Street Name?</title>
        <p>In addition to content differences in the brand and street-name BERTopic analysis, we also identified affective similarities and differences using VADER, a lexicon-based sentiment analysis tool. For this analysis, we extracted the compound VADER score per tweet, which ranged from –0.99 to 0.99, and emphasized a tweet’s valence intensity. We also extracted the sentiment label (positive, negative, or neutral) based on our cutoff criteria. We observed key differences by score and label. First, the mean VADER compound score for the brand corpus was between –0.05 and 0.05, showing that the mean sentiment was neutral, while the mean compound score for the street corpus was &gt;0.05, indicating an inclination toward positive sentiment in the street corpus (mean brand compound score –0.0082 SD 0.477; mean street compound score 0.11 SD 0.478). However, the SD for the compound scores was large in both corpora (0.47), and this suggests that we cannot broadly generalize the sentiment in the street or brand corpus as being predominantly positive, negative, or neutral.</p>
        <p><xref ref-type="table" rid="table5">Table 5</xref> shows the percentage of tweets in each corpus that fit within a specific sentiment label. These percentages do not account for the magnitude of a tweet’s sentiment score. Tweets that were only slightly positive (eg, 0.051) were labeled as having positive sentiment and were categorized alongside extremely positive tweets (eg, 0.80). In other words, our findings indicate both the overall magnitude of sentiment across all tweets in a corpus (the aforementioned mean compound scores) as well as the prevalence of tweets classified with each sentiment label according to our established cutoff scores (<xref ref-type="table" rid="table5">Table 5</xref>).</p>
        <p>One explanation for lower average VADER scores in the brand corpus may be the political nature of a substantive body of these tweets. For example, tweets about fentanyl often emphasized overdose, border security, and other similarly tense political dynamics, which were largely absent from the street corpus. This distinction may also explain the greater presence of tweets tagged with a “positive” VADER value in the street corpus (77,543/170,618, 45.45%) versus the brand corpus (88,826/245,145, 36.23%). Other insights gleaned from VADER include a smaller number of tweets tagged as negative in the street corpus compared with the brand corpus (street: 47,603/170,618, 27.9%; brand: 86,586/245,145, 35.32%). Both corpora contained similar amounts of posts with a neutral sentiment.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Sentiment percentages for brand and street corpus on the basis of computer-assigned sentiment labels (positive, negative, or neutral).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="280"/>
            <col width="290"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Corpus</td>
                <td>Positive sentiment total</td>
                <td>Negative sentiment total</td>
                <td>Neutral sentiment total</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Brand name</td>
                <td>36.2</td>
                <td>35.3</td>
                <td>28.5</td>
              </tr>
              <tr valign="top">
                <td>Street name</td>
                <td>45.4</td>
                <td>27.9</td>
                <td>26.7</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Research Question 3: Can We Accurately Predict the VADER-Generated Sentiment Label of a Tweet (ie, Positive, Negative, Neutral) From a Tweet’s Engagement Metric?</title>
        <p>When performing logistic regression with the brand-name corpus, the model excluding the text of a tweet as part of the features was 38.5% accurate on average across all engagement metrics compared with the model including tweet text as a feature where the model was 82.8% accurate on average. Similarly, the average model accuracy in the street-name corpus for the model excluding tweet text was 46.7% accurate, while the model including tweet text was 85.4% accurate. In both corpora, the models that included the text of a tweet as a feature when performing TF-IDF vectorization were more accurate by around 40%. We noted negligible differences in accuracy when comparing the likes, retweets, and replies models to each other within each corpus. The macro <italic>F</italic><sub>1</sub>-scores were even more different between the models that did not use the vectorized text (brand=0.231; street=0.214) compared with the models that did use the vectorized text (brand=0.828; street=0.854). Summary statistics are shown in <xref ref-type="table" rid="table6">Table 6</xref>.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Summary statistics for regression models<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="210"/>
            <col width="0"/>
            <col width="190"/>
            <col width="0"/>
            <col width="190"/>
            <col width="0"/>
            <col width="190"/>
            <col width="0"/>
            <col width="190"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Corpus</td>
                <td colspan="2">Accuracy (%)</td>
                <td colspan="2">Precision (%)</td>
                <td colspan="2">Recall (%)</td>
                <td><italic>F</italic><sub>1</sub>-score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="10">
                  <bold>Brand name</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Without text</td>
                <td colspan="2">38.5</td>
                <td colspan="2">58.3</td>
                <td colspan="2">33.5</td>
                <td colspan="2">23.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>With text</td>
                <td colspan="2">82.8</td>
                <td colspan="2">82.8</td>
                <td colspan="2">83</td>
                <td colspan="2">82.8</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Street name</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Without text</td>
                <td colspan="2">46.7</td>
                <td colspan="2">35</td>
                <td colspan="2">33.4</td>
                <td colspan="2">21.4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>With text</td>
                <td colspan="2">85.4</td>
                <td colspan="2">84.8</td>
                <td colspan="2">84.6</td>
                <td colspan="2">84.7</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>For both the street- and brand-name corpus, we find that adding the text of the tweet as a feature to our regression model greatly improved the accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score compared with the model that did not incorporate this feature.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <sec>
          <title>Overview</title>
          <p>This study used a neural network approach to topic modeling (BERTopic) to examine 2 contemporaneous corpora of tweets selected for brand and street-name drug references. Interestingly, differences in the interpretability between the corpora that we first observed with LDA [<xref ref-type="bibr" rid="ref38">38</xref>] remained salient with this more advanced approach. Then, using VADER, we identified that the street-name corpus has a larger inclination toward positive sentiment, while the brand-name corpus contains similar amounts of tweets labeled positive and negative. Finally, we combined the results from the topic model and sentiment analysis to create predictive models (logistic regression) to estimate sentiment labels from the topic ID and engagement metrics and compared the accuracy of the models that included the vectorized tweet text as a covariate and the models that did not.</p>
        </sec>
        <sec>
          <title>Topic Analysis</title>
          <p>BERTopic, in combination with Uniform Manifold Approximation and Projection and k-means clustering, yielded statistically coherent clustering of topics, although the outputs for the street-name corpus were more difficult to interpret and generalize. The tweets in the brand-name corpus discussed different drugs in the context of their intended uses, as well as how certain drugs were perceived to relate to ongoing political or social issues. The brand-name data set could be reduced to 5 major themes: broad discussion about fentanyl use and its discussion in a sociopolitical context; stimulant use (eg, Adderall, Ritalin, etc); discussion about music and car models related to the word “sonata”; psychedelic use; and discussion about anxiety-related medication (Xanax). The discourse about fentanyl was especially varied, with many topics containing posts relating to politics, immigration, border security, and, in some cases, actual use. This differed from how people discussed Adderall; in our data, people were concerned about the 2022 Adderall shortage [<xref ref-type="bibr" rid="ref63">63</xref>] and were interested in how to use the drug safely. As we indicated in the <italic>Results</italic> section, Sonata, the brand name of a sleep aid, tended to capture tweets about music and the Hyundai Sonata car model, and those tweets formed the only topic and category that was not drug related.</p>
          <p>For the street-name corpus, the BERTopic model with the highest statistical coherence score produced 40 topics, many of which overlapped and were not necessarily related to drug use. Only 32.11% (54,788/170,618) of all tweets were sorted into topics that pertained primarily to drug use, allowing the inference that most posts pertained to nongermane topics. Observationally, this was because many street names for drugs can refer to a variety of real-world concepts or phenomena (eg, words do not necessarily refer to a drug without additional context). Previous research supports the idea that machine-based NLP approaches may struggle to parse content containing street names for drugs effectively [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref64">64</xref>]. In the street-name corpus, 6 of the 8 clusters were sorted around terms unrelated to drug use. Out of these 6 clusters for the street-name corpus, 4 (67%) clusters (60,592/170,618, 35.51% of all posts) contained themes relating to football, fandoms, firearms, and the stock market. The last 2 clusters were even more difficult to categorize: we could only find general themes relating to the word “Skippy” (sometimes used colloquially to refer to stimulants) for one, and the other did not appear to us (as human interpreters) to have a core theme, although the NLP approach had a computational reason for generating the topics and cluster.</p>
          <p>Comparing the topics in the 2 corpora, 10 (25%) out of 40 topics in the street-name corpus contained &lt;1% of all posts, whereas the brand-name corpus had only 5 topics total. The street-name corpus contained many niche discussion topics compared with the few general themes of the brand-name corpus. On the basis of our findings from the BERTopic output, we suspect that refining a complex data set of this size by eliminating content that is not drug specific would be arduous. However, in moving from LDA [<xref ref-type="bibr" rid="ref38">38</xref>] to BERTopic (a more refined algorithm), we were better able to identify pockets of conversation that were not drug specific and were better able to tag them appropriately. Future research should consider additional work in data refining and classifier building with the street-specific data set.</p>
        </sec>
        <sec>
          <title>Sentiment Analysis and Predictive Modeling</title>
          <p>We used VADER to assess the sentiments of tweets and found that both corpora contained tweets with a wide range of sentiments. Interestingly, we found that the street-name corpus had a larger proportion of positively labeled tweets compared with the brand-name corpus. In our study, the terminology categorization for street-drug terms was complex, which may raise questions as to VADER’s applicability. However, VADER’s original validation study was particularly successful at classifying tweets or microblog text (vs other forms of text), outperforming even human raters, and the dictionary of lexical features was designed, in principle, to be domain agnostic [<xref ref-type="bibr" rid="ref41">41</xref>]. This increases our confidence in the VADER-based assessment of the data. We hypothesize that the street-name corpus was made up of many topics that are unrelated to drug use. Therefore, we suspect that many positive tweets were support from fans, such as fans of Nicki Minaj (barbs) and the Minnesota Vikings (Vikes). However, this analysis pipeline was not able to directly link words and sentiment, so we cannot be sure whether that was the case.</p>
          <p>Since the language features associated with emotionality were based on the VADER lexicon, we can know <italic>what</italic> kinds of things were scored as positive but not <italic>why</italic> those features were used to express a certain sentiment. Understanding the motivations behind positive communication is an important next step in understanding how individuals feel about drug use at scale. Arguendo, it might be the case that lexical features (eg, words, capitalization, context, punctuation, etc) associated with positive sentiment occur more often in drug discourse during events (eg, concerts) than drug discourse referring to isolated or solo use. To truly understand why individuals feel a certain way about different types of drug use would require additional deep qualitative methods and analysis. We used multinomial logistic regression to understand if we could predict the sentiment label or emotionality of a tweet using information about the tweet’s topic and how engaged users are with the tweet. We tested permutations of regression models that either (1) included tweet text as a covariate or (2) did not include it. We found that the models including the tweet text as a covariate explained more variation in tweet sentiment (by approximately 60% according to the macro <italic>F</italic><sub>1</sub>-score) than the models that did not incorporate text as a feature. This result was consistent across both corpora, showing that the generated topic ID and engagement metrics were not sufficient to predict the sentiment of a given tweet. Given the variables to which we had access, the only way to accurately predict tweet sentiment was to use the language itself. This means that aspects <italic>about</italic> a tweet, such as what it discusses (its topic ID) and how engaged people are with a tweet (number of likes, replies, and retweets), cannot be used to accurately predict the emotionality of a given tweet. This speaks to the diversity of opinions within a topic and how difficult it is to understand the sentiment of a tweet without knowing the full context within a post. Without the full context, we cannot predict whether a tweet about drug use will have positive or negative sentiment, even if we know what drug is being discussed and how well engaged people are with a post.</p>
        </sec>
        <sec>
          <title>Comparison to Previous Literature</title>
          <p>In the peer review for a paper on our previous LDA model (Parker et al [<xref ref-type="bibr" rid="ref38">38</xref>]), reviewers suggested that an appropriate next step would be the use of neural network modeling, which we performed here. The results of the BERTopic model support the conclusions from the LDA model. Specifically, the brand-name corpus was more easily categorized by a machine-based approach than the street-name corpus. As before, this difference seemed attributable to the fact that many of the words in the street corpus do not have a clear meaning outside of a narrow context. For instance, the word “Skippy” can refer to methylphenidate (eg, Ritalin), a brand of peanut butter, or a Canadian politician. In contrast, “fentanyl” has an unambiguous meaning even without context.</p>
          <p>The most obvious difference between the models is the number of topics generated. In our prior work, the LDA model generated 20 topics for the brand-name data set, while in this paper, the optimal BERTopic model was able to use 5 topics to cluster all posts. In contrast, the harder-to-parse street-name corpus resulted in more similar numbers of topics for LDA and BERTopic (35 and 40, respectively). The BERTopic analysis could more clearly delineate the different topics of discussion based on word context, allowing for an increased number of topics for the street-name data set and fewer topics in the brand-name corpus since discussion in the brand-name corpus is more homogenous and easily categorizable. The BERTopic model generated more cohesive themes than the LDA model due to pretrained BERT embeddings, which accurately captured the semantic relationships between words; thus, words with multiple meanings are better understood and categorized. In contrast, LDA uses word co-occurrence to generate topics for tweets, so LDA topic models might group documents together into the same topic that have the same word although this word is used in different contexts. As an example, “Adderall” can co-occur alongside other words like “anxiety” and “Ritalin.” In Parker et al [<xref ref-type="bibr" rid="ref38">38</xref>], the LDA model created 4 separate topics relating to Adderall use and 1 topic relating to the Adderall shortage. However, as we see from our BERTopic model, the more sophisticated algorithm was able to condense those same 4 topics into 1 topic relating to Adderall use, while discussion about the shortage was grouped into the topic relating to the intersection between politics and drug use.</p>
          <p>Previous work by Nasralah et al [<xref ref-type="bibr" rid="ref65">65</xref>] used LDA to better understand the most-discussed topics relating to the opioid epidemic by analyzing 503,830 tweets and filtering tweets via an evaluation matrix. Similar work [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref67">67</xref>] analyzing people’s reactions to the opioid epidemic has been conducted using textual analysis algorithms to find themes in X (Twitter) data. A study by Tassone et al [<xref ref-type="bibr" rid="ref68">68</xref>] used convolutional neural networks and other deep learning techniques to classify whether tweets about drug use were encouraging drug use (positive) or discouraging drug use (negative) and created synthetic tweets on drug use based on real tweets about drugs. While that approach also incorporated sentiment, our definition of a positive or negative tweet was dependent on the VADER classification instead of defining based on whether a tweet encourages or discourages drug use. In addition, we used a semisupervised technique (BERTopic) to classify tweets into general themes. Many studies [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref69">69</xref>,<xref ref-type="bibr" rid="ref70">70</xref>] that identify themes for a collection of tweets pertaining to drug use using manual annotation methods, including inductive and deductive qualitative coding, have also been conducted. In 2022, Al-Garadi et al [<xref ref-type="bibr" rid="ref71">71</xref>] used LDA and VADER scores to understand the different reasons for nonmedical prescription drug use. Cavazos-Rehg et al [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] focused on a single drug, marijuana, and how young people discuss marijuana use and react to popular accounts that discuss marijuana use. Both studies from Cavazos-Rehg et al [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] assessed sentiment using Twitter, but instead of analyzing sentiment using VADER, they used a crowd-sourcing service to code the sentiment of the tweets. In contrast, we used a classifier model across a wide variety of prescription drug conversations on Twitter rather than using human coders.</p>
        </sec>
        <sec>
          <title>Strengths and Limitations</title>
          <p>Our study’s strength lies in the cohesive topics generated by BERTopic, which enabled a clear understanding of the general themes of discussion in the street- and brand-name corpora. However, there are some limitations to our study. First, we cannot distinguish <italic>why</italic> the amount of positive sentiment differed between the brand and street tweets. The VADER analysis that we performed was descriptive in nature, and although we found the sentiment label and compound score of each tweet, we could not summarize why X (Twitter) users expressed positive or negative sentiments about a drug. Some form of stance detection would have to be conducted to better understand how different users feel about specific drugs. From the VADER scores, we can only identify aggregate trends regarding sentiment and not make conclusions about how individuals feel about specific types of drugs.</p>
          <p>In our text-comprehensive regression models, we classified the sentiment labels of tweets with a macro <italic>F</italic><sub>1</sub>-score of 82.8% in the brand-name corpus and 84.7% in the street-name corpus. Our modeling shows that sentiment labels can best be predicted using the cleaned text of a tweet as part of the feature set including engagement metrics and topic ID. However, without the text of a post, the <italic>F</italic><sub>1</sub>-score fell to 23.1% in the brand-name corpus and to 21.4% in the street-name corpus. This points to a limitation of topic modeling, that it is primarily an exploratory form of analysis that cannot tell us about the emotionality of a data set. Topic models can help researchers find the general ways how people are discussing a topic, but these topics can neither be used to predict the sentiment within the topic, nor, more obviously, allow deeper inferences about motivations and intentions.</p>
          <p>We were also limited by VADER, which is a lexicon-based sentiment analysis tool. Although the use of VADER is widely supported in the literature, there are concerns that VADER scores could be biased due to the overrepresentation or absence of certain words in the lexicon. In our case, certain slang terms for prescription drugs such as “perc” or “fent” are not present in the VADER lexicon as well as certain prescription drug names like Adderall or Ritalin. For our work, we were more interested in the <italic>context</italic> around certain prescription drug names and slang terms. We wanted to understand the emotional affect around certain terms, not necessarily the affect of the term itself. For future work, more work could be done to expand the VADER lexicon to include slang terms in addition to prescription drug names.</p>
          <p>One final limitation is the lack of generalizability in our study. From the time we collected our data, Twitter has been rebranded to X, and the number of active users, the way that users interact with the site, and the algorithm to show users’ content have all changed. We are not able to replicate our study since acquiring the volume of data that was available in the past is not feasible. The “infoveillance” component of our analysis is also put under question since geotagging is no longer available. The future of this type of research must be found on other social networking platforms, such as Facebook, Instagram, and BlueSky, which offer first-party APIs to track their data, and through platforms like PushShift, which is a third-party API for Reddit data.</p>
        </sec>
        <sec>
          <title>Implications</title>
          <p>Our findings broadly illustrate the importance of using more advanced computational approaches to mine social media data for conversations mentioning prescription drugs. In this section, we offer some practical implications of our study, including the importance of a refined data set for classifier construction and the need for more advanced sentiment analysis tools.</p>
          <p>Our BERTopic model classified the street- and drug-name corpora into a coherent set of individual topics, leading to a higher number of topic clusters in the street-name data set and fewer (only 5) topics for the brand-name data set. By leveraging BERTopic and regression models, we were able to further refine our data set, capturing more nuanced topic meaning to create a future classifier pertaining to web-based communication about drug use. More importantly, we were able to further isolate extraneous content (ie, tweets about cars, fanbases, and sports teams), which, theoretically, would impede the ability to train an accurate classifier. We have taken the first steps to build this classifier by identifying extraneous content. The next step would be to begin a manual annotation process of the refined data set using qualitative expertise to “tag” our data and begin a test-retest approach with training and validation data.</p>
          <p>Using VADER, we identified tweets as having positive, negative, or neutral sentiments. Then, we compared the percentages of positive, negative, and neutral tweets between the 2 corpora. This type of analysis allows us to characterize the sentiment in aggregate for the brand and street corpora. To further understand the sentiment that users on X (Twitter) have toward certain drugs, we need to perform more text filtering to find what specific words and phrases are used with certain drug-related words. The next steps include conducting an analysis to identify the lexicon surrounding the street and brand names of prescription drugs to form a better understanding of how certain drugs are discussed. With a more refined data set enhanced by qualitative coding, we may begin to build a training data set that could contain social media illicit drug use conversation data useful for designing health communication interventions.</p>
        </sec>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This work has shown how data from X (Twitter) can be used to identify topical trends surrounding both informal and formal discussions of drug use among users on the platform. Our work combines topic modeling and sentiment analysis to give greater detail on how users on X (Twitter) feel about different types of prescription drugs. Consistent with Parker et al [<xref ref-type="bibr" rid="ref38">38</xref>], we found that colloquialisms used in the street-name corpus disguise how people discuss drug use. The improved clustering offered by BERTopic allowed us to identify cohesive themes in the street- and brand-name corpora. The clear themes shown in the brand-name corpus contrast with the difficulties in parsing how individuals discuss street-name prescription drug use. From our literature review, we could not find many other works that captured the difficulties in trying to understand how individuals discuss street-name drug use. This points to a potential gap in the drug-discussion literature on how to analyze drugs when their street names are used. Furthermore, VADER analysis detected more positive sentiment among discussions in the street-name corpus compared with the brand-name corpus. Regression analysis of this classifier model determined that predicting the sentiment of drug use discussion is difficult without the full discussion context; topic and engagement metrics alone were insufficient to predict the sentiment of a street- or brand-name tweet.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">TF-IDF</term>
          <def>
            <p>term-frequency–inverse document frequency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">VADER</term>
          <def>
            <p>Valence Aware Dictionary and Sentiment Reasoner</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated during and analyzed during this study are available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeung</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The 'digital town square' problem</article-title>
          <source>RAND Corporation</source>
          <year>2023</year>
          <month>01</month>
          <day>13</day>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rand.org/blog/2023/01/the-digital-town-square-problem.html">https://www.rand.org/blog/2023/01/the-digital-town-square-problem.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinnenberg</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Buttenheim</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Padrez</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mancheno</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Twitter as a tool for health research: a systematic review</article-title>
          <source>Am J Public Health</source>
          <year>2017</year>
          <month>01</month>
          <volume>107</volume>
          <issue>1</issue>
          <fpage>e1</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2016.303512</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aiello</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Renson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zivich</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Social media- and internet-based disease surveillance for public health</article-title>
          <source>Annu Rev Public Health</source>
          <year>2020</year>
          <month>04</month>
          <day>02</day>
          <volume>41</volume>
          <issue>1</issue>
          <fpage>101</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.annualreviews.org/content/journals/10.1146/annurev-publhealth-040119-094402?crawler=true&amp;mimetype=application/pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-040119-094402</pub-id>
          <pub-id pub-id-type="medline">31905322</pub-id>
          <pub-id pub-id-type="pmcid">PMC7959655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>National and local influenza surveillance through Twitter: an analysis of the 2012-2013 influenza epidemic</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <month>12</month>
          <day>9</day>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e83672</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0083672"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0083672</pub-id>
          <pub-id pub-id-type="medline">24349542</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-35058</pub-id>
          <pub-id pub-id-type="pmcid">PMC3857320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guntuku</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Buttenheim</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Sherman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Twitter discourse reveals geographical and temporal variation in concerns about COVID-19 vaccines in the United States</article-title>
          <source>Vaccine</source>
          <year>2021</year>
          <month>07</month>
          <day>05</day>
          <volume>39</volume>
          <issue>30</issue>
          <fpage>4034</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34140171"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2021.06.014</pub-id>
          <pub-id pub-id-type="medline">34140171</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(21)00738-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8188387</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Agrawal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Choudhary</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Real-time disease surveillance using Twitter data: demonstration on flu and cancer</article-title>
          <source>Proceedings of the 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2013</year>
          <conf-name>KDD '13</conf-name>
          <conf-date>August 11-14, 2013</conf-date>
          <conf-loc>Chicago, IL</conf-loc>
          <fpage>1474</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/2487575.2487709"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2487575.2487709</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guntuku</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Sherman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stokes</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Seltzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
          </person-group>
          <article-title>Tracking mental health and symptom mentions on Twitter during COVID-19</article-title>
          <source>J Gen Intern Med</source>
          <year>2020</year>
          <month>09</month>
          <day>07</day>
          <volume>35</volume>
          <issue>9</issue>
          <fpage>2798</fpage>
          <lpage>800</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32638321"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-020-05988-8</pub-id>
          <pub-id pub-id-type="medline">32638321</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-020-05988-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC7340749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fredricks</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Woc-Colburn</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bottazzi</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Weatherhead</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Disproportionate impact of the COVID-19 pandemic on immigrant communities in the United States</article-title>
          <source>PLoS Negl Trop Dis</source>
          <year>2020</year>
          <month>07</month>
          <day>13</day>
          <volume>14</volume>
          <issue>7</issue>
          <fpage>e0008484</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pntd.0008484"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pntd.0008484</pub-id>
          <pub-id pub-id-type="medline">32658925</pub-id>
          <pub-id pub-id-type="pii">PNTD-D-20-00730</pub-id>
          <pub-id pub-id-type="pmcid">PMC7357736</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anwar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ilyas</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yaqub</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Zaman</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Analyzing QAnon on Twitter in context of US elections 2020: analysis of user messages and profiles using VADER and BERT topic modeling</article-title>
          <source>Proceedings of the 22nd Annual International Conference on Digital Government Research</source>
          <year>2021</year>
          <conf-name>dg.o '21</conf-name>
          <conf-date>June 9-11, 2021</conf-date>
          <conf-loc>Omaha, NE</conf-loc>
          <fpage>82</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tinyurl.com/2a6b4h6r"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3463677.3463718</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Goodson</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Neutral or framed? A sentiment analysis of 2019 abortion laws</article-title>
          <source>Sex Res Social Policy</source>
          <year>2022</year>
          <month>01</month>
          <day>18</day>
          <volume>19</volume>
          <issue>3</issue>
          <fpage>936</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35069923"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13178-022-00690-2</pub-id>
          <pub-id pub-id-type="medline">35069923</pub-id>
          <pub-id pub-id-type="pii">690</pub-id>
          <pub-id pub-id-type="pmcid">PMC8764246</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weeg</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Arango</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter to measure public discussion of diseases: a case study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2015</year>
          <month>06</month>
          <day>26</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>e6</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2015/1/e6/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.3953</pub-id>
          <pub-id pub-id-type="medline">26925459</pub-id>
          <pub-id pub-id-type="pii">v1i1e6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4763717</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mangachena</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Pickering</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Implications of social media discourse for managing national parks in South Africa</article-title>
          <source>J Environ Manage</source>
          <year>2021</year>
          <month>05</month>
          <day>01</day>
          <volume>285</volume>
          <fpage>112159</fpage>
          <pub-id pub-id-type="doi">10.1016/j.jenvman.2021.112159</pub-id>
          <pub-id pub-id-type="medline">33631485</pub-id>
          <pub-id pub-id-type="pii">S0301-4797(21)00221-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lynn</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rosati</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Leoni Santos</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Endo</surname>
              <given-names>PT</given-names>
            </name>
          </person-group>
          <article-title>Sorting the healthy diet signal from the social media expert noise: preliminary evidence from the healthy diet discourse on Twitter</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2020</year>
          <month>11</month>
          <day>18</day>
          <volume>17</volume>
          <issue>22</issue>
          <fpage>8557</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph17228557"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph17228557</pub-id>
          <pub-id pub-id-type="medline">33218105</pub-id>
          <pub-id pub-id-type="pii">ijerph17228557</pub-id>
          <pub-id pub-id-type="pmcid">PMC7698912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Makita</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mas-Bleda</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Thelwall</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Mental health discourses on Twitter during mental health awareness week</article-title>
          <source>Issues Ment Health Nurs</source>
          <year>2021</year>
          <month>05</month>
          <day>14</day>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>437</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1080/01612840.2020.1814914</pub-id>
          <pub-id pub-id-type="medline">32926796</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>Social media use by age</article-title>
          <source>Pew Research Center</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/chart/social-media-use-by-age/">https://www.pewresearch.org/internet/chart/social-media-use-by-age/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Atske</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Social media use in 2021</article-title>
          <source>Pew Research Center</source>
          <year>2021</year>
          <access-date>2024-04-04</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2021/04/07/social-media-use-in-2021/">https://www.pewresearch.org/internet/2021/04/07/social-media-use-in-2021/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wojcik</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Sizing up Twitter users</article-title>
          <source>Pew Research Center</source>
          <year>2019</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2019/04/24/sizing-up-twitter-users/">https://www.pewresearch.org/internet/2019/04/24/sizing-up-twitter-users/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vogels</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Gelles-Watnick</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Massarat</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Teens, social media and technology</article-title>
          <source>Pew Research Center</source>
          <year>2022</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2022/08/10/teens-social-media-and-technology-2022/">https://www.pewresearch.org/internet/2022/08/10/teens-social-media-and-technology-2022/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rainie</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Vogels</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Perrin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rainie</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Connection, creativity and drama: teen life on social media in 2022</article-title>
          <source>Pew Research Center</source>
          <year>2022</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2022/11/16/connection-creativity-and-drama-teen-life-on-social-media-in-2022/">https://www.pewresearch.org/internet/2022/11/16/connection-creativity-and-drama-teen-life-on-social-media-in-2022/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>U.S. overdose deaths decrease in 2023, first time since 2018</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2024</year>
          <month>05</month>
          <day>15</day>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/nchs/pressroom/nchs_press_releases/2024/20240515.htm">https://www.cdc.gov/nchs/pressroom/nchs_press_releases/2024/20240515.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Janet</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sean</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Czarina</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Expanding access to naloxone: a review of distribution strategies</article-title>
          <source>Health Policy and Services Research</source>
          <year>2019</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://digirepo.nlm.nih.gov/catalog/nlm:nlmuid-101750194-pdf">https://digirepo.nlm.nih.gov/catalog/nlm:nlmuid-101750194-pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Penm</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>MacKinnon</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Boone</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Ciaccia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McNamee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Winstanley</surname>
              <given-names>EL</given-names>
            </name>
          </person-group>
          <article-title>Strategies and policies to address the opioid epidemic: a case study of Ohio</article-title>
          <source>J Am Pharm Assoc (2003)</source>
          <year>2017</year>
          <month>03</month>
          <volume>57</volume>
          <issue>2S</issue>
          <fpage>S148</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1544-3191(17)30001-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.japh.2017.01.001</pub-id>
          <pub-id pub-id-type="medline">28189539</pub-id>
          <pub-id pub-id-type="pii">S1544-3191(17)30001-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5497298</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Dunn</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Huhn</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Harm reduction for opioid use disorder: strategies and outcome metrics</article-title>
          <source>Am J Psychiatry</source>
          <year>2024</year>
          <month>05</month>
          <day>01</day>
          <volume>181</volume>
          <issue>5</issue>
          <fpage>372</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1176/appi.ajp.20230918</pub-id>
          <pub-id pub-id-type="medline">38706335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <article-title>National survey on drug use and health: summary of methodological studies, 1971–2014</article-title>
          <source>Substance Abuse and Mental Health Services Administration</source>
          <year>2014</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK519735/">https://www.ncbi.nlm.nih.gov/books/NBK519735/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cormier</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cushman</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Innovation via social media - the importance of Twitter to science</article-title>
          <source>Res Pract Thromb Haemost</source>
          <year>2021</year>
          <month>03</month>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>373</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2475-0379(22)01344-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/rth2.12493</pub-id>
          <pub-id pub-id-type="medline">33870022</pub-id>
          <pub-id pub-id-type="pii">S2475-0379(22)01344-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC8035792</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Topaloglu</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Towards large-scale Twitter mining for drug-related adverse events</article-title>
          <source>Proceedings of the 2012 ACM International Workshop on Smart Health and Wellbeing</source>
          <year>2012</year>
          <month>10</month>
          <day>29</day>
          <conf-name>SHB'12</conf-name>
          <conf-date>2012</conf-date>
          <conf-loc>Maui, HI</conf-loc>
          <fpage>25</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28967001"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2389707.2389713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Masino</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Forsyth</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Fiks</surname>
              <given-names>AG</given-names>
            </name>
          </person-group>
          <article-title>Detecting adverse drug reactions on Twitter with convolutional neural networks and word embedding features</article-title>
          <source>J Healthc Inform Res</source>
          <year>2018</year>
          <month>06</month>
          <day>12</day>
          <volume>2</volume>
          <issue>1-2</issue>
          <fpage>25</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35415401"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s41666-018-0018-9</pub-id>
          <pub-id pub-id-type="medline">35415401</pub-id>
          <pub-id pub-id-type="pii">18</pub-id>
          <pub-id pub-id-type="pmcid">PMC8982795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moh</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>On adverse drug event extractions using twitter sentiment analysis</article-title>
          <source>Netw Model Anal Health Inform Bioinforma</source>
          <year>2017</year>
          <month>9</month>
          <day>18</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>18</fpage>
          <pub-id pub-id-type="doi">10.1007/s13721-017-0159-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ginn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Scotch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Malone</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Social media mining for toxicovigilance: automatic monitoring of prescription medication abuse from Twitter</article-title>
          <source>Drug Saf</source>
          <year>2016</year>
          <month>03</month>
          <day>9</day>
          <volume>39</volume>
          <issue>3</issue>
          <fpage>231</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26748505"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40264-015-0379-4</pub-id>
          <pub-id pub-id-type="medline">26748505</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-015-0379-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4749656</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Towards automating location-specific opioid toxicosurveillance from Twitter via data science methods</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2019</year>
          <month>08</month>
          <day>21</day>
          <volume>264</volume>
          <fpage>333</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31437940"/>
          </comment>
          <pub-id pub-id-type="doi">10.3233/SHTI190238</pub-id>
          <pub-id pub-id-type="medline">31437940</pub-id>
          <pub-id pub-id-type="pii">SHTI190238</pub-id>
          <pub-id pub-id-type="pmcid">PMC6774610</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chun</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Bhole</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Geller</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Enabling real-time drug abuse detection in Tweets</article-title>
          <source>Proceedings of the 2017 IEEE 33rd International Conference on Data Engineering</source>
          <year>2017</year>
          <conf-name>ICDE '17</conf-name>
          <conf-date>April 19-22, 2017</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <fpage>1510</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/7930118"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/icde.2017.221</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tofighi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Aphinyanaphongs</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Marini</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemlou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nayebvali</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Metzger</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Raghunath</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Detecting illicit opioid content on Twitter</article-title>
          <source>Drug Alcohol Rev</source>
          <year>2020</year>
          <month>03</month>
          <day>22</day>
          <volume>39</volume>
          <issue>3</issue>
          <fpage>205</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32202005"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/dar.13048</pub-id>
          <pub-id pub-id-type="medline">32202005</pub-id>
          <pub-id pub-id-type="pmcid">PMC8276110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>The canary in the coal mine tweets: social media reveals public perceptions of non-medical use of opioids</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <month>8</month>
          <day>7</day>
          <volume>10</volume>
          <issue>8</issue>
          <fpage>e0135072</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0135072"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0135072</pub-id>
          <pub-id pub-id-type="medline">26252774</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-56862</pub-id>
          <pub-id pub-id-type="pmcid">PMC4529203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Salyer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grucza</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Bierut</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>Twitter chatter about marijuana</article-title>
          <source>J Adolesc Health</source>
          <year>2015</year>
          <month>02</month>
          <volume>56</volume>
          <issue>2</issue>
          <fpage>139</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25620299"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jadohealth.2014.10.270</pub-id>
          <pub-id pub-id-type="medline">25620299</pub-id>
          <pub-id pub-id-type="pii">S1054-139X(14)00703-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4306811</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grucza</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bierut</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Characterizing the followers and tweets of a marijuana-focused Twitter handle</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>06</month>
          <day>27</day>
          <volume>16</volume>
          <issue>6</issue>
          <fpage>e157</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/6/e157/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3247</pub-id>
          <pub-id pub-id-type="medline">24974893</pub-id>
          <pub-id pub-id-type="pii">v16i6e157</pub-id>
          <pub-id pub-id-type="pmcid">PMC4090385</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Brawner</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Kranzler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Giorgi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Abera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Exploring substance use tweets of youth in the United States: mixed methods study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>03</month>
          <day>26</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e16191</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/1/e16191/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16191</pub-id>
          <pub-id pub-id-type="medline">32213472</pub-id>
          <pub-id pub-id-type="pii">v6i1e16191</pub-id>
          <pub-id pub-id-type="pmcid">PMC7146240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Kath</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>QC</given-names>
            </name>
          </person-group>
          <article-title>National substance use patterns on Twitter</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <month>11</month>
          <day>6</day>
          <volume>12</volume>
          <issue>11</issue>
          <fpage>e0187691</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0187691"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0187691</pub-id>
          <pub-id pub-id-type="medline">29107961</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-20338</pub-id>
          <pub-id pub-id-type="pmcid">PMC5673183</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Parker</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Eddens</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Agley</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Results and methodological implications of the digital epidemiology of prescription drug references among Twitter users: latent Dirichlet allocation (LDA) analyses</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>07</month>
          <day>28</day>
          <volume>25</volume>
          <fpage>e48405</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e48405/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/48405</pub-id>
          <pub-id pub-id-type="medline">37505795</pub-id>
          <pub-id pub-id-type="pii">v25i1e48405</pub-id>
          <pub-id pub-id-type="pmcid">PMC10422173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Katsuki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
            <name name-style="western">
              <surname>Cuomo</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Establishing a link between prescription drug abuse and illicit online pharmacies: analysis of Twitter data</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>12</month>
          <day>16</day>
          <volume>17</volume>
          <issue>12</issue>
          <fpage>e280</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/12/e280/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5144</pub-id>
          <pub-id pub-id-type="medline">26677966</pub-id>
          <pub-id pub-id-type="pii">v17i12e280</pub-id>
          <pub-id pub-id-type="pmcid">PMC4704982</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Tshimula</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Dubé</surname>
              <given-names>È</given-names>
            </name>
            <name name-style="western">
              <surname>Graham</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Greyson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>MacDonald</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>Unmasking the Twitter discourses on masks during the COVID-19 pandemic: user cluster-based BERT topic modeling approach</article-title>
          <source>JMIR Infodemiology</source>
          <year>2022</year>
          <month>12</month>
          <day>9</day>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e41198</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://infodemiology.jmir.org/2022/2/e41198/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/41198</pub-id>
          <pub-id pub-id-type="medline">36536763</pub-id>
          <pub-id pub-id-type="pii">v2i2e41198</pub-id>
          <pub-id pub-id-type="pmcid">PMC9749113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>VADER: a parsimonious rule-based model for sentiment analysis of social media text</article-title>
          <source>Proc Int AAAI Conf Weblogs Soc Media</source>
          <year>2014</year>
          <month>05</month>
          <day>16</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>216</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ojs.aaai.org/index.php/icwsm/article/view/14550"/>
          </comment>
          <pub-id pub-id-type="doi">10.1609/icwsm.v8i1.14550</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grootendorst</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>BERTopic: neural topic modeling with a class-based TF-IDF procedure</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on March 11, 2022</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.48550/arXiv.2203.05794"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O’Callaghan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Greene</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Carthy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cunningham</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>An analysis of the coherence of descriptors in topic modeling</article-title>
          <source>Expert Systems with Applications</source>
          <year>2015</year>
          <month>08</month>
          <volume>42</volume>
          <issue>13</issue>
          <fpage>5645</fpage>
          <lpage>57</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2015.02.055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <article-title>About the scoring</article-title>
          <source>VaderSentiment</source>
          <year>2021</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://vadersentiment.readthedocs.io/en/latest/pages/about_the_scoring.html">https://vadersentiment.readthedocs.io/en/latest/pages/about_the_scoring.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bathina</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Thij</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rutter</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Bollen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Declining well-being during the COVID-19 pandemic reveals US social inequities</article-title>
          <source>PLoS One</source>
          <year>2021</year>
          <month>7</month>
          <day>8</day>
          <volume>16</volume>
          <issue>7</issue>
          <fpage>e0254114</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0254114"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0254114</pub-id>
          <pub-id pub-id-type="medline">34237087</pub-id>
          <pub-id pub-id-type="pii">PONE-D-21-07348</pub-id>
          <pub-id pub-id-type="pmcid">PMC8266050</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Thij</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bathina</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rutter</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Bollen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Social media insights into US mental health during the COVID-19 pandemic: longitudinal analysis of Twitter data</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>12</month>
          <day>14</day>
          <volume>22</volume>
          <issue>12</issue>
          <fpage>e21418</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/12/e21418/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21418</pub-id>
          <pub-id pub-id-type="medline">33284783</pub-id>
          <pub-id pub-id-type="pii">v22i12e21418</pub-id>
          <pub-id pub-id-type="pmcid">PMC7744146</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gibbons</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Malouf</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Spitzberg</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Appleyard</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nara</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tsou</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Twitter-based measures of neighborhood sentiment as predictors of residential population health</article-title>
          <source>PLoS One</source>
          <year>2019</year>
          <month>7</month>
          <day>11</day>
          <volume>14</volume>
          <issue>7</issue>
          <fpage>e0219550</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0219550"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0219550</pub-id>
          <pub-id pub-id-type="medline">31295294</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-32339</pub-id>
          <pub-id pub-id-type="pmcid">PMC6622529</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Patterson</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Computational analyses identify addiction help-seeking behaviors on the social networking website Reddit: insights into online social interactions and addiction support communities</article-title>
          <source>PLOS Digit Health</source>
          <year>2022</year>
          <month>11</month>
          <day>9</day>
          <volume>1</volume>
          <issue>11</issue>
          <fpage>e0000143</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812569"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000143</pub-id>
          <pub-id pub-id-type="medline">36812569</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-22-00118</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931264</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rathje</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Van Bavel</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>van der Linden</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Out-group animosity drives engagement on social media</article-title>
          <source>Proc Natl Acad Sci USA</source>
          <year>2021</year>
          <month>06</month>
          <day>29</day>
          <volume>118</volume>
          <issue>26</issue>
          <fpage>e2024292118</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pnas.org/doi/abs/10.1073/pnas.2024292118?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.2024292118</pub-id>
          <pub-id pub-id-type="medline">34162706</pub-id>
          <pub-id pub-id-type="pii">2024292118</pub-id>
          <pub-id pub-id-type="pmcid">PMC8256037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Arousal increases social transmission of information</article-title>
          <source>Psychol Sci</source>
          <year>2011</year>
          <month>07</month>
          <volume>22</volume>
          <issue>7</issue>
          <fpage>891</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/full/10.1177/0956797611413294"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0956797611413294</pub-id>
          <pub-id pub-id-type="medline">21690315</pub-id>
          <pub-id pub-id-type="pii">0956797611413294</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sekimoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Seki</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshida</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Umemura</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>The metrics of keywords to understand the difference between retweet and like in each category</article-title>
          <source>Proceedings of the 2020 IEEE/WIC/ACM International Joint Conference on Web Intelligence and Intelligent Agent Technology</source>
          <year>2020</year>
          <conf-name>WI-IAT '20</conf-name>
          <conf-date>December 14-20, 2020</conf-date>
          <conf-loc>Melbourne, Australia</conf-loc>
          <fpage>560</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.computer.org/csdl/proceedings-article/wi-iat/2020/192400a560/1uHhvviLp6w"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/wiiat50758.2020.00084</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brady</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wills</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Jost</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Van Bavel</surname>
              <given-names>JJ</given-names>
            </name>
          </person-group>
          <article-title>Emotion shapes the diffusion of moralized content in social networks</article-title>
          <source>Proc Natl Acad Sci USA</source>
          <year>2017</year>
          <month>07</month>
          <day>11</day>
          <volume>114</volume>
          <issue>28</issue>
          <fpage>7313</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28652356"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1618923114</pub-id>
          <pub-id pub-id-type="medline">28652356</pub-id>
          <pub-id pub-id-type="pii">1618923114</pub-id>
          <pub-id pub-id-type="pmcid">PMC5514704</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tsugawa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ohsaki</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>On the relation between message sentiment and its virality on social media</article-title>
          <source>Soc Netw Anal Min</source>
          <year>2017</year>
          <month>5</month>
          <day>11</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <pub-id pub-id-type="doi">10.1007/s13278-017-0439-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Crockett</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Moral outrage in the digital age</article-title>
          <source>Nat Hum Behav</source>
          <year>2017</year>
          <month>11</month>
          <day>18</day>
          <volume>1</volume>
          <issue>11</issue>
          <fpage>769</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1038/s41562-017-0213-3</pub-id>
          <pub-id pub-id-type="medline">31024117</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-017-0213-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grootendorst</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>BERTopic: c-TF-IDF</article-title>
          <source>GitHub</source>
          <year>2024</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://maartengr.github.io/BERTopic/getting_started/ctfidf/ctfidf.html">https://maartengr.github.io/BERTopic/getting_started/ctfidf/ctfidf.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Understanding inverse document frequency: on theoretical arguments for IDF</article-title>
          <source>J Doc</source>
          <year>2004</year>
          <month>10</month>
          <volume>60</volume>
          <issue>5</issue>
          <fpage>503</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1108/00220410410560582</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Delen</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Delen</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Performance evaluation for predictive modeling</article-title>
          <source>Advanced Data Mining Techniques</source>
          <year>2008</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>137</fpage>
          <lpage>47</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tarekegn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ricceri</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ferracin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Giacobini</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Predictive modeling for frailty conditions in elderly people: machine learning approaches</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>06</month>
          <day>04</day>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>e16678</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/2318/1741011"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16678</pub-id>
          <pub-id pub-id-type="medline">32442149</pub-id>
          <pub-id pub-id-type="pii">v8i6e16678</pub-id>
          <pub-id pub-id-type="pmcid">PMC7303829</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schütze</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Introduction to Information Retrieval</source>
          <year>2008</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>HY</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Comparison of artificial neural network and logistic regression models for predicting in-hospital mortality after primary liver cancer surgery</article-title>
          <source>PLoS One</source>
          <year>2012</year>
          <month>4</month>
          <day>26</day>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>e35781</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0035781"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0035781</pub-id>
          <pub-id pub-id-type="medline">22563399</pub-id>
          <pub-id pub-id-type="pii">PONE-D-12-00715</pub-id>
          <pub-id pub-id-type="pmcid">PMC3338531</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Su</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Toward optimal heparin dosing by comparing multiple machine learning methods: retrospective study</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>06</month>
          <day>22</day>
          <volume>8</volume>
          <issue>6</issue>
          <fpage>e17648</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/6/e17648/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17648</pub-id>
          <pub-id pub-id-type="medline">32568089</pub-id>
          <pub-id pub-id-type="pii">v8i6e17648</pub-id>
          <pub-id pub-id-type="pmcid">PMC7338927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forrest</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>43 things to know about Justin Trudeau’s new rival</article-title>
          <source>Politico</source>
          <year>2022</year>
          <month>09</month>
          <day>10</day>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.politico.com/news/2022/09/09/conservative-pierre-poilievre-in-his-own-words-00055343">https://www.politico.com/news/2022/09/09/conservative-pierre-poilievre-in-his-own-words-00055343</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="web">
          <article-title>FDA announces shortage of Adderall</article-title>
          <source>U.S. Food and Drug Administration</source>
          <year>2023</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/drugs/drug-safety-and-availability/fda-announces-shortage-adderall">https://www.fda.gov/drugs/drug-safety-and-availability/fda-announces-shortage-adderall</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adrover</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bodnar</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Telenti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Salathé</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Identifying adverse effects of HIV drug treatment and associated sentiments using Twitter</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2015</year>
          <month>07</month>
          <day>27</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>e7</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2015/2/e7/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.4488</pub-id>
          <pub-id pub-id-type="medline">27227141</pub-id>
          <pub-id pub-id-type="pii">v1i2e7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4869211</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nasralah</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>El-Gayar</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Social media text mining framework for drug abuse: development and validation study with an opioid crisis case analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>08</month>
          <day>13</day>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>e18350</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/8/e18350/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18350</pub-id>
          <pub-id pub-id-type="medline">32788147</pub-id>
          <pub-id pub-id-type="pii">v22i8e18350</pub-id>
          <pub-id pub-id-type="pmcid">PMC7446758</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Glowacki</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Glowacki</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Wilcox</surname>
              <given-names>GB</given-names>
            </name>
          </person-group>
          <article-title>A text-mining analysis of the public's reactions to the opioid crisis</article-title>
          <source>Subst Abus</source>
          <year>2018</year>
          <month>04</month>
          <day>01</day>
          <volume>39</volume>
          <issue>2</issue>
          <fpage>129</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1080/08897077.2017.1356795</pub-id>
          <pub-id pub-id-type="medline">28723265</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajesh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wilcox</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ring</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mackert</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Reactions to the opioid epidemic: a text-mining analysis of tweets</article-title>
          <source>J Addict Dis</source>
          <year>2021</year>
          <month>10</month>
          <day>26</day>
          <volume>39</volume>
          <issue>2</issue>
          <fpage>183</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1080/10550887.2020.1834812</pub-id>
          <pub-id pub-id-type="medline">33103608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tassone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Simpson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mendhe</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mago</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Choudhury</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Utilizing deep learning and graph mining to identify drug use on Twitter data</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <month>12</month>
          <day>30</day>
          <volume>20</volume>
          <issue>Suppl 11</issue>
          <fpage>304</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-01335-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-01335-3</pub-id>
          <pub-id pub-id-type="medline">33380324</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-01335-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7772918</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Black</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Margolin</surname>
              <given-names>ZR</given-names>
            </name>
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Dart</surname>
              <given-names>RC</given-names>
            </name>
          </person-group>
          <article-title>Online conversation monitoring to understand the opioid epidemic: epidemiological surveillance study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>06</month>
          <day>29</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e17073</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e17073/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17073</pub-id>
          <pub-id pub-id-type="medline">32597786</pub-id>
          <pub-id pub-id-type="pii">v6i2e17073</pub-id>
          <pub-id pub-id-type="pmcid">PMC7367521</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dekeseredy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sedney</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Razzaq</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Haggerty</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>HH</given-names>
            </name>
          </person-group>
          <article-title>Tweeting stigma: an exploration of Twitter discourse regarding medications used for both opioid use disorder and chronic pain</article-title>
          <source>J Drug Issues</source>
          <year>2021</year>
          <month>01</month>
          <day>12</day>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>340</fpage>
          <lpage>57</lpage>
          <pub-id pub-id-type="doi">10.1177/0022042620986508</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Garadi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Love</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Large-scale social media analysis reveals emotions associated with nonmedical prescription drug use</article-title>
          <source>Health Data Sci</source>
          <year>2022</year>
          <month>04</month>
          <day>27</day>
          <volume>2022</volume>
          <fpage>9851989</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://spj.science.org/doi/10.34133/2022/9851989?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.34133/2022/9851989</pub-id>
          <pub-id pub-id-type="medline">37621877</pub-id>
          <pub-id pub-id-type="pii">9851989</pub-id>
          <pub-id pub-id-type="pmcid">PMC10449547</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
