<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e48405</article-id>
      <article-id pub-id-type="pmid">37505795</article-id>
      <article-id pub-id-type="doi">10.2196/48405</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Results and Methodological Implications of the Digital Epidemiology of Prescription Drug References Among Twitter Users: Latent Dirichlet Allocation (LDA) Analyses</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Melton</surname>
            <given-names>Chad</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gupta</surname>
            <given-names>Ketan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Matsuda</surname>
            <given-names>Shinichi</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Parker</surname>
            <given-names>Maria A</given-names>
          </name>
          <degrees>MS, MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Epidemiology and Biostatistics</institution>
            <institution>School of Public Health</institution>
            <institution>Indiana University Bloomington</institution>
            <addr-line>809 E 9th St</addr-line>
            <addr-line>Bloomington, IN, 47405</addr-line>
            <country>United States</country>
            <phone>1 812 856 5950</phone>
            <email>map2@iu.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9763-1129</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Valdez</surname>
            <given-names>Danny</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2355-9881</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Rao</surname>
            <given-names>Varun K</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-7704-1974</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Eddens</surname>
            <given-names>Katherine S</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7783-4156</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Agley</surname>
            <given-names>Jon</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2345-8850</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Epidemiology and Biostatistics</institution>
        <institution>School of Public Health</institution>
        <institution>Indiana University Bloomington</institution>
        <addr-line>Bloomington, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Applied Health Science</institution>
        <institution>School of Public Health</institution>
        <institution>Indiana University Bloomington</institution>
        <addr-line>Bloomington, IN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Informatics</institution>
        <institution>Luddy School of Informatics, Computing, and Engineering</institution>
        <institution>Indiana University Bloomington</institution>
        <addr-line>Bloomington, IN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Maria A Parker <email>map2@iu.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>28</day>
        <month>7</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e48405</elocation-id>
      <history>
        <date date-type="received">
          <day>21</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>18</day>
          <month>5</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>1</day>
          <month>6</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>6</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Maria A Parker, Danny Valdez, Varun K Rao, Katherine S Eddens, Jon Agley. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 28.07.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e48405" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Social media is an important information source for a growing subset of the population and can likely be leveraged to provide insight into the evolving drug overdose epidemic. Twitter can provide valuable insight into trends, colloquial information available to potential users, and how networks and interactivity might influence what people are exposed to and how they engage in communication around drug use.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This exploratory study was designed to investigate the ways in which unsupervised machine learning analyses using natural language processing could identify coherent themes for tweets containing substance names.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study involved harnessing data from Twitter, including large-scale collection of brand name (N=262,607) and street name (N=204,068) prescription drug–related tweets and use of unsupervised machine learning analyses (ie, natural language processing) of collected data with data visualization to identify pertinent tweet themes. Latent Dirichlet allocation (LDA) with coherence score calculations was performed to compare brand (eg, OxyContin) and street (eg, oxys) name tweets.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found people discussed drug use differently depending on whether a brand name or street name was used. Brand name categories often contained political talking points (eg, border, crime, and political handling of ongoing drug mitigation strategies). In contrast, categories containing street names occasionally referenced drug misuse, though multiple social uses for a term (eg, Sonata) muddled topic clarity.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Content in the brand name corpus reflected discussion about the drug itself and less often reflected personal use. However, content in the street name corpus was notably more diverse and resisted simple LDA categorization. We speculate this may reflect effective use of slang terminology to clandestinely discuss drug-related activity. If so, straightforward analyses of digital drug-related communication may be more difficult than previously assumed. This work has the potential to be used for surveillance and detection of harmful drug use information. It also might be used for appropriate education and dissemination of information to persons engaged in drug use content on Twitter.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>Twitter</kwd>
        <kwd>LDA</kwd>
        <kwd>drug use</kwd>
        <kwd>digital epidemiology</kwd>
        <kwd>unsupervised analysis</kwd>
        <kwd>tweet</kwd>
        <kwd>tweets</kwd>
        <kwd>social media</kwd>
        <kwd>epidemiology</kwd>
        <kwd>epidemiological</kwd>
        <kwd>machine learning</kwd>
        <kwd>text mining</kwd>
        <kwd>data mining</kwd>
        <kwd>pharmacy</kwd>
        <kwd>pharmaceutic</kwd>
        <kwd>pharmaceutical</kwd>
        <kwd>pharmaceuticals</kwd>
        <kwd>drug</kwd>
        <kwd>prescription</kwd>
        <kwd>NLP</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The drug overdose epidemic has claimed more than 100,000 lives in 12-month year/year mortality reports for the past several years [<xref ref-type="bibr" rid="ref1">1</xref>]. The epidemic is constantly changing and has arguably done so at least 4 significant times (“waves”) since 2002 [<xref ref-type="bibr" rid="ref2">2</xref>]. The current (fourth) wave involves high mortality from stimulants and illicit fentanyl, both through unintentional ingestion of fentanyl (eg, as a contaminant) and from comorbid use with other drugs [<xref ref-type="bibr" rid="ref2">2</xref>]. However, in drug use research and in public perception, individual drugs and drug use disorders are often investigated in isolation (eg, a perception that the current epidemic remains an “opioid use” crisis), even though many individuals use drugs in combination or may not even intend to ingest an opioid prior to opioid overdose. These concerns are borne out in overdose death records. For example, an alarming increase in deaths involving methamphetamine and cocaine, with overdoses of both exceeding 10,000 in year/year estimates, illustrates that the current crisis may be more appropriately characterized as a polydrug overdose crisis [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      </sec>
      <sec>
        <title>Literature Review</title>
        <p>Social media is an important information source and communication tool for a growing subset of the population. The Pew Research Center [<xref ref-type="bibr" rid="ref4">4</xref>] estimates that 84% of people ages 18 years to 24 years use at least one social media site. Increasingly, the idea that we gather in a “virtual town square” [<xref ref-type="bibr" rid="ref5">5</xref>] is borne out in reality. It is therefore unsurprising that drug use—in a wide variety of forms and manners—is discussed on platforms like Twitter openly and without perceived judgement. In many ways, this reflects our nation’s long history and current interest in all things psychoactive [<xref ref-type="bibr" rid="ref6">6</xref>]. It also logically follows that social media is an emerging venue for observational research on drug use. For example, individuals participate in online communities, social relationships, and conversations about drug use (including transactional dialogue) on Twitter, which may be leveraged to provide insight into this evolving epidemic [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. Twitter (and presumably platforms operating in a similar manner) can provide valuable insight into trends in discourse, the types of colloquial information available to potential drug users, and how networks and interactivity might influence content exposure and expression. Research has described Twitter’s potential for serving as a platform for real-time content analysis [<xref ref-type="bibr" rid="ref10">10</xref>], and it has been extensively used to study a multitude of mental health phenomena, including resilience, internalizing disorders, and help-seeking behaviors [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Twitter can provide valuable insight into trends (eg, via predictive analytics), colloquial information available to potential users (eg, via natural language processing [NLP]), and how networks and interactivity might influence the content to which people are exposed and that they express (eg, via social network analysis) [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>Although there have been numerous studies that explore one drug or drug class, such as prescription drug misuse, in the Twittersphere [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>], there have been fewer related to polydrug use [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>] or drug use more broadly [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Additional research is needed to leverage “infoveillance” strategies for drug-related content on social media. Digital epidemiology can be used to help us identify themes online [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Discussion of drug use and overdose via Twitter is common and may offer insights into how drugs are shared or discussed. Indeed, querying Twitter data for specific keywords associated with a drug’s prescription or street name yields large-scale data sets with such potential insights. However, these data sets require appropriate processing, analyses, and visualization to assess potential behavioral risk factors and communication patterns and to facilitate interpretation.</p>
        <p>In the specific area of overdose deaths, the foundational components needed to conduct rigorous social media research are not currently in place. The breadth and complexity of the ways in which drug use is discussed in formal and informal ways mean that contextually naïve approaches to social media analyses (eg, those that are not informed by topic-specific expertise in drug epidemiology) may struggle to produce meaningful and coherent output. This study lays out preliminary analyses and decision-making heuristics developed by a multidisciplinary team of researchers with expertise in big data analyses, social media, communication networks, and drug epidemiology. We leveraged Twitter’s application programming interface (API) to longitudinally scrape content specific to drug use, polydrug use, and overdose. An advantage of this approach is that Twitter’s API can provide access to a large number of discrete “documents” (ie, individual tweets) with limited character length, meaning that the number of possible “topics” appearing in any given tweet is limited by the nature of the platform.</p>
        <p>For this study, we were interested in studying the digital ecosystem for drug-related communication in this social media space. We suspected that the manifestations of drug-related content might also differ meaningfully depending on whether “brand” or “prescription” drug terms were used to identify tweets or whether “street names” were used to identify tweets. Brand or prescription terms may be used to discuss social events or things that are observed, whereas street names logically may be intended to conceal the topic of discussion or to signal group membership (though to the degree that they enter popular discourse, these purposes may shift). As formative research, this study was guided by the following 3 research questions:</p>
        <list list-type="order">
          <list-item>
            <p>What themes emerged from a corpus of tweets containing references to brand or prescription-named drugs (eg, OxyContin and oxycodone)?</p>
          </list-item>
          <list-item>
            <p>What themes emerged from a corpus of tweets containing references to street names of prescription medications (eg, oxys, oxi)?</p>
          </list-item>
          <list-item>
            <p>What differences could be observed between themes identified from brand versus street names of the same prescription drugs?</p>
          </list-item>
        </list>
        <p>Findings from this exploratory study stand to inform the relative landscape of digital communication relating to drug use and drug sharing online (ie, not tobacco or alcohol related). Insights from this study can be further leveraged to inform digital intervention and policy work and to provide further methodological considerations for conducting this type of digital epidemiology.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was reviewed by the Indiana University Institutional Review Board (protocol #18081) and received an exempt designation.</p>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>Data for this study were collected over a 3-month period from the public Twitter API using strategic queries and Boolean phrases (eg, OR, AND operators) to elicit data pertaining to drug-related communication on Twitter. These phrases were used to create a composite data set that was saved into a secure repository. Personally identifiable information was removed ahead of formal analysis. We identified appropriate queries by leveraging the National Institute on Drug Abuse website for commonly used drugs and their colloquial terms (or street names) [<xref ref-type="bibr" rid="ref22">22</xref>]. Using this list, we developed keywords as filters that were then applied to the collection of Twitter data [<xref ref-type="bibr" rid="ref15">15</xref>]. Keywords included a drug’s generic name (eg, oxycodone), brand name (eg, OxyContin, Xanax), and “street” names (eg, oxy, xannies).</p>
        <p>To create a comparative study, we created 2 distinct corpora: (1) a corpus of tweets containing references to brand names of prescription drugs (hereafter referred to as the Brand Names Corpus) and (2) a corpus of tweets containing references to street names of prescription drugs (hereafter referred to as the Street Name Corpus). Our final sample sizes after data cleaning, screening for duplicates, and other irrelevant data yielded 262,607 tweets in the Brand Names Corpus (Nbrand_name) and 204,068 tweets in the Street Name Corpus (Nstreet_name) for a final sample size of 466,675 tweets.</p>
      </sec>
      <sec>
        <title>Analyses</title>
        <sec>
          <title>LDA Topic Models</title>
          <p>LDA is a commonly applied unsupervised NLP tool used to explore large-scale, unstructured corpora. LDA’s calculus for deriving a series of mathematically supported topics about a corpus has been colloquially described as a “bag of words” modeling approach [<xref ref-type="bibr" rid="ref23">23</xref>] because it is unconcerned with sentence structure or word order and only focuses on the total set of words (“terms”) that exists within each tweet. To perform this analysis, there are 3 main entities of interest: words, documents, and a corpus. A document contains a specific sequence of words, and a set of documents is considered to be part of a corpus. Before any analysis is done, words that do not contribute to the document’s meaning must be removed. Since LDA identifies patterns via co-occurrences of different words, we are especially interested in words that have at least one independent semantic meaning. Thus, words that are punctuation marks, stop words, and hyperlinks were removed from each document. After those words were removed, we ran the LDA topic model to find the underlying topic model structure for each corpus.</p>
          <p>More formally, LDA is defined as a generative probabilistic model of a corpus [<xref ref-type="bibr" rid="ref24">24</xref>]. In other words, documents are mixtures over a random distribution of topics, and each topic is represented via a distribution of words. Each document contains a set of words in which each word is distributed over a set of topics. According to Blei et al [<xref ref-type="bibr" rid="ref24">24</xref>], a topic can be defined as a distribution over some fixed vocabulary. To approximate latent topics, LDA uses Bayesian modeling with Gibbs sampling, a calculation that compares every word “x” with every other word “y” across a series of “d” documents in a corpus. Words and terms with high degrees of co-occurrence, that is words and phrases that commonly appear together, are grouped to form a latent topic, which represents a core idea within the corpus. Although LDA identifies “topics” (sets of related terms) within the data set, the algorithm does not interpret what those topics mean. By viewing sets of terms used to form each topic and by reading example tweets that are strongly allocated to those topics, it is possible for researchers to label and assess the meaning of the topics generated by LDA. The utility of this approach is that it effectively leverages very large data sets: Although the allocation of any individual tweet may be either precise or imprecise, the overall generation of topics provides an accurate “10,000-mile“ overview of the co-occurrence of terms being used within the total corpus of documents. <xref rid="figure1" ref-type="fig">Figure 1</xref> outlines a general LDA pipeline.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Conceptual diagram outlining a latent Dirichlet allocation (LDA) pipeline from preprocessing through qualitative review.</p>
            </caption>
            <graphic xlink:href="jmir_v25i1e48405_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>LDA topic models have been widely applied in the health and medical sciences in exploratory capacities to study the structure of related and unrelated documents. Although we acknowledge LDA’s age and function relative to newer, supervised NLP analyses (eg, BERT), we chose LDA due to its ease of access for non-NLP experts and its general application as an exploratory data analysis tool. For more information on topic modeling in health science, including proposed applications and functions, see studies by Valdez et al [<xref ref-type="bibr" rid="ref13">13</xref>] and Valdez and Goodson [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
        </sec>
        <sec>
          <title>Coherence Score Cross-Validation</title>
          <p>Coherence score cross-validation refers to an iterative analysis to identify the optimal number of topics for a given corpus. Coherence score cross-validation was completed by programming a computer to iteratively run an LDA topic model for an increasing <italic>k</italic> number of topics. For each analysis, the computer generates a coherence score, which refers to the degree to which a topic can be accurately interpreted by a human. It is recommended that any exploratory analyses that utilize LDA topic models should report coherence scores as a measure of model fit. For more information on coherence scores and cross-validation, see [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        </sec>
        <sec>
          <title>Informal Qualitative Review</title>
          <p>Although a computer can derive latent topics from a corpus, a computer cannot infer deeper meaning behind the content of the topics and what each topic represents. Thus, we performed an informal review, defined as an independent review and subsequent discussion, in which a randomly selected number of tweets per topic was reviewed by the research team to ascertain meaning. For more information on the qualitative review of NLP output, see Valdez et al [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        </sec>
      </sec>
      <sec>
        <title>Procedure</title>
        <p>Our workflow is depicted in <xref rid="figure2" ref-type="fig">Figure 2</xref>. First, we queried the Twitter API for top brand and street names commonly used by the US population. Tweets pertaining to brand name medications were triaged into the first corpus, and tweets pertaining to common street names were triaged into a second corpus. To pre-process the data, we ignored all articles and prepositions using the stopwords provided in the Natural Language Toolkit library. References to hyperlinks were ignored as well as white text and the @ symbol along with an individual's Twitter handle and all retweets. Duplicate tweets by the same user were also ignored, as well as any words that contained references to numbers. After pre-processing the data, the cleaned documents were used in the topic model. Any further references to tweets in this paper will refer to the uncleaned tweets, to provide the full context of the tweet. Then, we performed an iterative LDA on the corpora to determine the optimal number of topics. Once optimal topic numbers were identified, we ran a final LDA comprised of 20 topics for the Brand Name Corpus and 35 topics for the Street Name Corpus. Once we created topics, we used a “sort” function to classify all tweets in either corpus into one of the latent topics. The researchers on this team then convened to review a random selection of 25 to 50 tweets per topic to denote potential underlying meaning. Note, that because the “sort” function relies on keywords, rather than supervised sorting, there is typically a high degree of overlap among topics. As such, team members reviewed tweets per topic until unanimous agreement was reached regarding meaning [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Conceptual diagram detailing our analysis pipeline. API: application programming interface; LDA: latent Dirichlet application.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e48405_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>Our findings illustrate the digital ecosystem pertaining to prescription medication communication on Twitter. Across the Brand Name Corpus and Street Name Corpus, we observed several overarching themes that may offer insight into how drug use communication occurs online. However, there were also several noteworthy differences between corpora that may reinforce the difficulty of digital surveillance regarding drug use. We present our findings in the following sections, which are parsed by the outlined research questions.</p>
      </sec>
      <sec>
        <title>Aim 1: What Themes Emerged From a Corpus of Tweets Containing References to Brand or Prescription-Named Drugs?</title>
        <p>For the Brand Name Corpus, our coherence score cross-validation indicated a 20-topic solution (<xref rid="figure3" ref-type="fig">Figure 3</xref>). <xref ref-type="table" rid="table1">Table 1</xref> outlines each topic, with representative keywords and the total number of documents (or tweets) that was sorted per topic. From <xref ref-type="table" rid="table1">Table 1</xref>, we note that topics 1, 8, 10, 12, and 20 were the 5 topics with the largest documents per topic. Broadly, these topics represented the most frequently co-occurring themes embedded within the brand name data set and are the most dominant topics in 49.44% (129,832/262,607) of all documents in the data set. We observed that tweets related to the border, opioid crisis, and political figures were found in nearly every topic, which may suggest drug communication. Thus, as it relates to brand names, topics may principally frame drug use as an ongoing social issue. Although some topics did allude to general drug use in lighthearted, humorous, or other social contexts (eg, topics 19 and 10), these topics contained a minority of all the documents in the data set. From <xref ref-type="table" rid="table2">Table 2</xref>, we observe that the drugs that were discussed the most were Adderall, prescription drugs (Ritalin, Xanax, Valium), benzodiazepines, and fentanyl. Individuals discussing Adderall either discussed the general effects of using the drug (eg, tweet: I never really take my full Adderall dosage but lately I have for work and it has been so helpful lmao I truly do get in my own way), humorously discussed Adderall usage (eg, tweet: I can take multiple adderalls without problem but let me take a single adderall with a latte and I start getting scared lmao), or expressed concern over the Adderall shortage that began in October 2022 (eg, tweet: RT @BostonGlobe: Amid the Adderall shortage, people with ADHD face withdrawal and despair) [<xref ref-type="bibr" rid="ref28">28</xref>]. The theme that had the largest number of topics was the theme with topics that referenced fentanyl use.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Coherence score plot for the iterative latent Dirichlet allocation (LDA) analyses across the (A) Brand Name Corpus and (B) Street Name Corpus.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e48405_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Top 10 words in each topic for the Brand Name Corpus (N=262,607).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="570"/>
            <col width="330"/>
            <thead>
              <tr valign="bottom">
                <td>Topic ID</td>
                <td>Keywords<sup>a</sup></td>
                <td>Documents per topic, n, %</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>adderall, get, shortage, eat, go, sh*t<sup>b</sup>, need, snort, let, trump</td>
                <td>32,215 (12.27)</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>mdma, test, show, video, therapy, see, mind, meet, alcohol, think</td>
                <td>7777 (2.96)</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>fentanyl, child, pill, report, find, fake, public, police, lace, candy</td>
                <td>7163 (2.73)</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>new, thank, high, year, happy, record, beat, release, share, build</td>
                <td>3895 (1.48)</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>sonata, watch, fit, car, piece, movement, winter, major, love, op</td>
                <td>13,011 (4.95)</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>kid, sound, keep, really, truth, get, think, school, d*mn, parent’</td>
                <td>6111 (2.32)</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>come, fight, go, eye, bar, future, real, couple, destroy, plan’</td>
                <td>4052 (1.54)</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>fentanyl, border, kill, people, death, country, stop, cartel, die, crisis</td>
                <td>32,536 (12.41)</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>get, ask, run, leave, say, guy, front, mom, find, go</td>
                <td>7847 (2.99)</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>valium, take, m, go, xanax, need, know, get, talk, want</td>
                <td>20,993 (7.99)</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>give, back, dose, turn, go, addiction, time, throw, face, catch</td>
                <td>6705 (2.56)</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>adderall, feel, get, ritalin, adhd, work, take, make, prescribe, hard</td>
                <td>24,148 (9.21)</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>adderall, pop, crazy, b*tch, feel, game, fly, finally, go, win</td>
                <td>4716 (1.80)</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>make, weed, use, shroom, drug, fun, post, mdma, trip, available</td>
                <td>7935 (3.02)</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>ambien, need, play, piano, listen, write, tweet, music, read, song</td>
                <td>9557 (3.63)</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>good, never, man, ever, get, hear, bad, vicodin, life, think</td>
                <td>11,981 (4.55)</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>drug, fentanyl, overdose, die, sell, cocaine, dealer, use, heroin, addict</td>
                <td>16,462 (6.27)</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>adderall, help, use, people, also, anxiety, lot, lol, effect, drug</td>
                <td>13,338 (5.09)</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>s, get, buy, prescription, doctor, oxycontin, week, month, pain, last</td>
                <td>12,311 (4.67)</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>take, adderall, day, sleep, time, hour, today, night, drink, morning</td>
                <td>19,846 (7.56)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Sorted by word weight; weights correspond to word order.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>*Asterisks were added during the paper write-up but did not appear in the actual keywords.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Brand name topics (n=20).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Topic theme</td>
                <td>Topic IDs</td>
                <td>Cumulative amount, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Adderall use or shortage</td>
                <td>1, 5, 6, 12, 20</td>
                <td>5 (25)</td>
              </tr>
              <tr valign="top">
                <td>Adderall shortage</td>
                <td>7, 13</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>Politics fentanyl</td>
                <td>3, 8, 9, 15, 16, 17</td>
                <td>6 (30)</td>
              </tr>
              <tr valign="top">
                <td>Prescription drug use (Xanax, Valium, and Ritalin were all mentioned)</td>
                <td>2, 4, 10</td>
                <td>3 (15)</td>
              </tr>
              <tr valign="top">
                <td>Stimulant use or lifestyle</td>
                <td>11, 14, 19</td>
                <td>3 (15)</td>
              </tr>
              <tr valign="top">
                <td>Assorted drug use</td>
                <td>18</td>
                <td>1 (5)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref> presents an intertopic distance map, which broadly determines the relative similarities and differences across topics based on their word distributions [<xref ref-type="bibr" rid="ref29">29</xref>]. A dynamic version of this figure is available online [<xref ref-type="bibr" rid="ref30">30</xref>]. The figure implies that topics in the Brand Name Corpus were typically, but not always, distinct. From this figure, we can observe how interrelated and distinct certain topics are. For example, we find that topics 1 and 20 are very similar to one another, since they are plotted on top of one another, which is confirmed in <xref ref-type="table" rid="table3">Table 3</xref>, where we find topics 1 and 20 both reference Adderall use. Generally, topics that are close to one another have similar themes since these topics will have similar word distributions. However, even though topics can have similar word distributions, they can display different themes based on the weighting of certain words in those distributions. For example, topics 9 and 11 have similar words used in both topics but have different themes.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Intertopic distance map (via multidimensional scaling) for brand name tweets. PC: principal component.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e48405_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Top 10 words in each topic for the Street Name Corpus (N=204,068).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="600"/>
            <col width="300"/>
            <thead>
              <tr valign="bottom">
                <td>ID</td>
                <td>Keywords<sup>a</sup></td>
                <td>Documents per topic, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>dog, cause, hear, get, light, mix, pain, go, benzo, problem</td>
                <td>3783 (1.85)</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>man, good, morning, back, big, sorry, suck, well, here, look</td>
                <td>3828 (1.88)</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>oxy, great, trade, stock, day, buy, today, ready, oil, trank</td>
                <td>5571 (2.73)</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>play, team, top_analyst, price_target, week, vike, next_week, player, good, season</td>
                <td>4756 (2.33)</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>start, fire, go, power, get, make, poor, skippy, money, office</td>
                <td>3660 (1.79)</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>skippy, d*mn<sup>b</sup>, go, get, right, s, sit, brother, keep, tell</td>
                <td>8876 (4.35)</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>downer, away, dexy, open, nice, upper, buy, room, release, go</td>
                <td>3662 (1.79)</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>lol, name, smoke, week, sick, action, member, rule, crack, carry</td>
                <td>2119 (1.04)</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>say, s, word, pull, guess, link, crystal, suppose, date, m</td>
                <td>2939 (1.44)</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>love, much, go, really, get, m, sleep, fr, hope, help</td>
                <td>5215 (2.56)</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>thank, hit, always, fan, wait, come, good, cute, still, funny</td>
                <td>3077 (1.51)</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>cool, get, one, worth, hook, foot, fail, report, read, opinion</td>
                <td>2240 (1.10)</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>take, watch, go, tonight, f*cking, welcome, benzo, get, usually, week</td>
                <td>2871 (1.41)</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>drug, get, pill, take, smart, people, make, give, benzo, doctor</td>
                <td>9527 (4.67)</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>know, hard, find, crystal_meth, vote, look, get, right, go, try</td>
                <td>4256 (2.09)</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>downers_grove, do, cook, north, live, gt, south, company, day, el_rushbo</td>
                <td>2375 (1.12)</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>use, new, year, dumb, happy, barb, seem, art, thing, also</td>
                <td>3771 (1.85)</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>feel, happy, m, pill, make, take, eat, day, good, list</td>
                <td>6121 (3.00)</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>leave, alone, get, stand, write, step, learn, rock, house, enough</td>
                <td>2511 (1.23)</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>people, re, know, want, skippy, say, s, think, get, wrong</td>
                <td>9732 (4.77)</td>
              </tr>
              <tr valign="top">
                <td>21</td>
                <td>percs, perc, get, pop, sh*t, f*ck, take, b*tch, go, fake</td>
                <td>20,521 (1.01)</td>
              </tr>
              <tr valign="top">
                <td>22</td>
                <td>true, lose, hydro, performance, finish, good, attention, replace, mate, lucky</td>
                <td>1604 (0.79)</td>
              </tr>
              <tr valign="top">
                <td>23</td>
                <td>barb, go, tweet, friend, follow, see, say, know, lie, happen</td>
                <td>10,415 (5.10)</td>
              </tr>
              <tr valign="top">
                <td>24</td>
                <td>vike, game, go, win, get, bill, skol, viking, let, lay</td>
                <td>19,027 (9.32)</td>
              </tr>
              <tr valign="top">
                <td>25</td>
                <td>bring, enjoy, lude, story, work, get, food, full, send, provide</td>
                <td>3387 (1.66)</td>
              </tr>
              <tr valign="top">
                <td>26</td>
                <td>next, miss, move, check, day, make, video, group, hour, gain</td>
                <td>2151 (1.05)</td>
              </tr>
              <tr valign="top">
                <td>27</td>
                <td>year, get, upper, last, old, long, home, fixer_upper, later, go</td>
                <td>5425 (2.66)</td>
              </tr>
              <tr valign="top">
                <td>28</td>
                <td>need, happy_pill, stop, book, take, get, smile, together, seriously, always</td>
                <td>4959 (2.43)</td>
              </tr>
              <tr valign="top">
                <td>29</td>
                <td>time, oxy, free, ahead, spend, levels_poste, family, market, never, break</td>
                <td>2177 (1.07)</td>
              </tr>
              <tr valign="top">
                <td>30</td>
                <td>sure, drug, country, future, fentanyl, war, speed, police, business, arrest</td>
                <td>2917 (1.42)</td>
              </tr>
              <tr valign="top">
                <td>31</td>
                <td>stay, vitamin, wish, school, ass, forget, beautiful, sexy_dexy, special, healthy</td>
                <td>2640 (1.29)</td>
              </tr>
              <tr valign="top">
                <td>32</td>
                <td>way, benzo, get, help, omg, s, sound, test, sell, addicted</td>
                <td>4032 (1.98)</td>
              </tr>
              <tr valign="top">
                <td>33</td>
                <td>yellow, red, blue, see, give, peanut_butter, card, green, goal, match</td>
                <td>4367 (2.14)</td>
              </tr>
              <tr valign="top">
                <td>34</td>
                <td>barb, get, nicki, say, talk, think, literally, hate, even, g</td>
                <td>10,790 (5.29)</td>
              </tr>
              <tr valign="top">
                <td>35</td>
                <td>upper, month, low, clean, barb, gun, get, high, lower, fit</td>
                <td>2905 (1.42)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Sorted by word weight; weights correspond to word order.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>*Asterisks were added during the paper write-up but did not appear in the actual keywords.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Aim 2: What Themes Emerged From a Corpus of Tweets Containing References to Street Names for Prescription Medications?</title>
        <p>For the Street Name Corpus, our coherence score cross-validation yielded a 35-topic solution (see <xref rid="figure5" ref-type="fig">Figure 5</xref>; note that we have added the .HTML files for <xref rid="figure3" ref-type="fig">Figures 3</xref> and <xref rid="figure5" ref-type="fig">5</xref> in [<xref ref-type="bibr" rid="ref31">31</xref>]). The top 10 keywords for each topic are shown in <xref ref-type="table" rid="table4">Table 4</xref>, as is the number of documents sorted into each topic. We observed that content comprising each topic was notably more diverse and often did not pertain exclusively to drug use communication. For example, words like “skippy” and “vike,” which are common street names for Adderall and Vicodin, often have large amounts of crossover with tweets using these terms in other popular contexts including peanut butter (Skippy) and the Minnesota Vikings (Vikes). We can also directly observe this crossover when comparing the keywords in topics 34 and 35, where “barb” referred both to Nicki Minaj’s fanbase (eg, tweet: @NICKIMINAJ BARBS STAND TF UP) and actual barbiturate use (eg, tweet: Gotta pop this barb and take off tonight) with near equal frequency. We further observed that, beyond popular culture references, other street names queried for the analysis yielded references to securities and stock exchanges, as seen in topics 3 and 34, where oxy both refers to Oxycontin, the medication, as well as the publicly traded company Occidental Petroleum Corporation, whose stock listing is Oxy. In contexts in which the street medication did refer to a particular drug or substance, we observed colloquial-style communication and references to recreational use. Topics 21, 32, 13, and 14 show this phenomenon, while topics 3, 24, 33, and 34 show how those same terms can also be used to collect tweets about non-drug-related topics. Overall, we found that 34.3% (69,995/204,068) of all topics were related to a diverse use or multiuse of a word. We found that 26% (53,058/204,068) of topics referenced Percocet use, and 20% (40,814/204,068) of topics referenced different opioids.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Intertopic distance map (via multidimensional scaling) for street name tweets. PC: principal component.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e48405_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Street name topics (n=35).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="370"/>
            <col width="430"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Topic theme</td>
                <td>Topic IDs</td>
                <td>Cumulative amount, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>No coherent topic</td>
                <td>5, 11, 30, 17</td>
                <td>4 (13)</td>
              </tr>
              <tr valign="top">
                <td>Percocet</td>
                <td>3, 9, 12, 13, 19, 21, 22, 26, 32</td>
                <td>9 (26)</td>
              </tr>
              <tr valign="top">
                <td>Diverse (all)</td>
                <td>4, 6, 7, 10, 18, 23, 24, 25, 28, 33, 34, 35</td>
                <td>12 (34)</td>
              </tr>
              <tr valign="top">
                <td>Diverse: vikes</td>
                <td>5, 11, 25</td>
                <td>3 (9)</td>
              </tr>
              <tr valign="top">
                <td>Diverse: oxy</td>
                <td>7, 25</td>
                <td>2 (6)</td>
              </tr>
              <tr valign="top">
                <td>Diverse: soccer</td>
                <td>33</td>
                <td>1 (3)</td>
              </tr>
              <tr valign="top">
                <td>Diverse: barbs</td>
                <td>18, 23, 34, 35</td>
                <td>4 (11)</td>
              </tr>
              <tr valign="top">
                <td>Diverse: band</td>
                <td>28</td>
                <td>1 (3)</td>
              </tr>
              <tr valign="top">
                <td>Diverse: skippy</td>
                <td>6</td>
                <td>1 (3)</td>
              </tr>
              <tr valign="top">
                <td>Opioids</td>
                <td>1, 2, 8, 14, 15, 29, 31</td>
                <td>7 (20)</td>
              </tr>
              <tr valign="top">
                <td>Uppers (amphetamine, stimulants)</td>
                <td>16, 20, 27</td>
                <td>3 (9)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Aim 3: What Differences Could Be Observed Between Themes Identified From Brand Versus Street Names of the Same Prescription Drugs?</title>
        <p>When comparing the topics generated from the Street Name Corpus and Brand Name Corpus, we found that the generated topics in both data sets included a lot of noise. However, the type and style of noise were not uniform. For example, in the brand name data set, we observed noise via the politization of drug use and US society and occasionally misnomers, including references to Sonata as either a medication or vehicle. Although people mentioned recreational drug use in the Brand Name Corpus, references to single and polydrug use were more apparent in the Street Name Corpus. However, noise in the Street Name Corpus, which broadly alluded to diverse use of a single term or multiple terms, made identifying drug-related tweets notably more difficult. We also found less discussion about personal illicit drug use in the Brand Name Corpus, since names like “fentanyl” or “oxycontin” were often mentioned in relation to how the drugs are used or to political issues. In comparison, the street name data set was where personal illicit drug use was discussed more often and was less easily categorizable as a result. Tweets in these topics either contained some mention of drug use or the street name was also associated with non-drug-related entities, resulting in the search query picking up tweets that were not related to drug use.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study found that people discussed prescription drug use differently depending on whether a brand name or street name was used. Brand name categories often contained political talking points, while street name categories occasionally referenced drug misuse, though multiple social uses for a term muddled topic clarity. Content in the Brand Name Corpus reflected discussion about the drug itself and less often reflected personal use. However, content in the Street Name Corpus was notably more diverse and resisted simple LDA categorization.</p>
        <p>This study demonstrates distinct differences between tweets containing brand names and those containing street names of prescription drugs. It is plausible that these differences represent “silos” of discourse regarding drug use on Twitter: one in which Twitter users are creating and responding to content regarding the reciprocal impact of drug use on US politics and society or challenges in legally obtaining drugs in a shortage (brand names) and one in which Twitter users informally convene to discuss their experiences in using and obtaining prescription drugs recreationally.</p>
        <p>Our findings reinforce the difficult nature of digital surveillance for important and timely health topics. Particularly for street names, there is complexity in interpreting and even identifying the salient meaning in tweets from a large corpus of documents. For example, one might argue that information or words only attain meaning “in relation” to context [<xref ref-type="bibr" rid="ref32">32</xref>]. In other words, a tweet containing the word “vikes” is not universally interpreted as a text about Vicodin (eg, depending on the context and on people’s own lexicon, the word might mean many different things to different people). In contrast, the term “fentanyl” (from the Brand Name Corpus) has a clear meaning that is largely independent both of context and of people’s lexicon but can take on different political overtones and meanings depending on the surrounding context, intention, and even the recipients who are tagged in the tweet. Thus, in examining how data for the corpora were parsed differently, we speculate that we observed more clarity in drug use typology with brand names because any meaning that emerged discursively or from context primarily pertained to other words within the tweets aside from the drug term itself (eg, supportive or oppositional to a particular policy). In contrast, with street names, the drug use term itself often was ambiguous and achieved meaning in Twitter conversations based on the shared understanding of other users. A tool such as LDA is powerful because it can process large volumes of information with minimal input, but this study shows how drug use researchers must use care in using such unsupervised approaches to conduct digital epidemiology, especially when intending to learn about use behavior.</p>
        <p>Highlighting this complexity, visualization via distance mapping was ideal for the brand name tweets, but it did not represent the street name tweets well. This difference can be explained by the singular use of brand names (as a drug-related reference) relative to the multiple ways street names can be used and contextualized. For example, skippy, a well-known street name for Adderall, was often used in the context of the Skippy peanut butter brand. Likewise, the noise surrounding the street name tweets may create challenges for scientific data exploration but serves a practical purpose on Twitter of diverting attention or avoiding the attention of authorities. More research is needed to further contextualize how street names are used in a clandestine yet open manner online and on social media.</p>
        <p>Although researchers have used various techniques to identify drug-related messages online, the structure, content, and function of drug use and overdose information engagement networks on Twitter have not been well explored. This work quantifies the content and context of communication about prescription drugs on Twitter and increases the understanding of key themes in dissemination. A limitation of our analytic approach to identifying major drug use themes on Twitter is that it does not address individual tweet content specifically. To gain a full understanding of the identified themes, we recommend large-scale human coding of random samples of collected tweet data to support these findings. To better understand the role of structure in drug use tweet dissemination, next steps will explore follower networks in drug use and overdose communication on Twitter, applying social network analysis to determine the characteristics and positions of important players, similar to work that has been done around political communication [<xref ref-type="bibr" rid="ref33">33</xref>]. In an era of rapid information access and dissemination, the combination of quick and targeted interventions oriented to promote helpful drug use information or influence and reduce the impact of negative drug use information is key, especially for vulnerable populations such as youth. This work has the potential to be used for surveillance and detection of harmful information and for appropriate education and dissemination of information to persons engaged in drug use content on Twitter. Understanding the actors participating in these conversations may help us identify and engage influential players to reach people where they are (on Twitter) and disseminate relevant, timely, and effective health communication.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study was subject to limitations we hope to address in future work. First, although our analysis pipeline is supported in the literature, we relied on entirely unsupervised NLP analyses to analyze our data. Although the findings uncovered by the LDA, cross-validation, and initial data exploration tools are likely valid, more sophisticated and supervised machine learning analyses may have yielded further nuance. Future research should consider revisiting our data with such tools, including running topic models using S-BERT or GPT vectors. However, we caution that these analyses should only be undertaken in circumstances in which data are already highly cleaned and devoid of any noise, which our analysis sought to identify for future research. Second, we acknowledge that our informal review of topics did not constitute an in-depth qualitative evaluation of each topic. It is likely that performing more robust qualitative analyses would likewise yield more nuanced findings. Last, our study was likewise limited by abrupt changes to Twitter’s administrative team and particularly its purchase by current CEO Elon Musk; these changes limited our ability to collect more data given the slow truncation of Twitter’s API. The future of Twitter for academic research remains uncertain; therefore, we recommend a similar study be conducted on social networking websites other than Twitter, including Instagram and Threads, TikTok (barring future congressional bans), Mastadon, and/or Spill - examples of a growing number of Twitter alternatives with increasing popularity.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Drug use is widely discussed on social media. However, using a brand name or street name notably altered the content of a given social media post. Our findings largely confirm that drug communication fell into either politically charged discussions of drug use or the context of using drugs for medical or recreational purposes. Given the overwhelming nature of social media and social media as data, the wide presence of drug use disclosures online may promote drug use behaviors among vulnerable populations, including people with drug use disorders and adolescents.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None disclosed.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Cisewski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rossen</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Provisional Drug Overdose Death Counts</article-title>
          <source>National Center for Health Statistics</source>
          <access-date>2022-10-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/nchs/nvss/vsrr/drug-overdose-data.htm">https://www.cdc.gov/nchs/nvss/vsrr/drug-overdose-data.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ciccarone</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The rise of illicit fentanyls, stimulants and the fourth wave of the opioid overdose crisis</article-title>
          <source>Curr Opin Psychiatry</source>
          <year>2021</year>
          <month>07</month>
          <day>01</day>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>344</fpage>
          <lpage>350</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33965972"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/YCO.0000000000000717</pub-id>
          <pub-id pub-id-type="medline">33965972</pub-id>
          <pub-id pub-id-type="pii">00001504-900000000-99040</pub-id>
          <pub-id pub-id-type="pmcid">PMC8154745</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Kasper</surname>
              <given-names>ZA</given-names>
            </name>
            <name name-style="western">
              <surname>Cicero</surname>
              <given-names>TJ</given-names>
            </name>
          </person-group>
          <article-title>Twin epidemics: The surging rise of methamphetamine use in chronic opioid users</article-title>
          <source>Drug Alcohol Depend</source>
          <year>2018</year>
          <month>12</month>
          <day>01</day>
          <volume>193</volume>
          <fpage>14</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1016/j.drugalcdep.2018.08.029</pub-id>
          <pub-id pub-id-type="medline">30326396</pub-id>
          <pub-id pub-id-type="pii">S0376-8716(18)30687-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Auxier</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Social Media Use in 2021</article-title>
          <source>Pew Research Center</source>
          <year>2021</year>
          <month>04</month>
          <day>07</day>
          <access-date>2023-03-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2021/04/07/social-media-use-in-2021/">https://www.pewresearch.org/internet/2021/04/07/social-media-use-in-2021/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burgess</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The ‘digital town square’? What does it mean when billionaires own the online spaces where we gather?</article-title>
          <source>The Conversation</source>
          <year>2022</year>
          <month>04</month>
          <day>27</day>
          <access-date>2023-03-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://theconversation.com/the-digital-town-square-what-does-it-mean-when-billionaires-own-the-online-spaces-where-we-gather-182047">http://theconversation.com/the-digital-town-square-what-does-it-mean-when-billionaires-own-the-online-spaces-where-we-gather-182047</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>LS</given-names>
            </name>
          </person-group>
          <article-title>Substance abuse and America: historical perspective on the federal response to a social phenomenon</article-title>
          <source>J Natl Med Assoc</source>
          <year>1981</year>
          <month>06</month>
          <volume>73</volume>
          <issue>6</issue>
          <fpage>497</fpage>
          <lpage>506</lpage>
          <pub-id pub-id-type="medline">7017155</pub-id>
          <pub-id pub-id-type="pmcid">PMC2552718</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kalyanam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Katsuki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>R G Lanckriet</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
          </person-group>
          <article-title>Exploring trends of nonmedical use of prescription drugs and polydrug abuse in the Twittersphere using unsupervised machine learning</article-title>
          <source>Addict Behav</source>
          <year>2017</year>
          <month>02</month>
          <volume>65</volume>
          <fpage>289</fpage>
          <lpage>295</lpage>
          <pub-id pub-id-type="doi">10.1016/j.addbeh.2016.08.019</pub-id>
          <pub-id pub-id-type="medline">27568339</pub-id>
          <pub-id pub-id-type="pii">S0306-4603(16)30299-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shutler</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Portelli</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Blachford</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Drug use in the Twittersphere: a qualitative contextual analysis of tweets about prescription drugs</article-title>
          <source>J Addict Dis</source>
          <year>2015</year>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>303</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.1080/10550887.2015.1074505</pub-id>
          <pub-id pub-id-type="medline">26364675</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rutherford</surname>
              <given-names>BN</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>CCW</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stjepanović</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>GCK</given-names>
            </name>
          </person-group>
          <article-title>#TurntTrending: a systematic review of substance use portrayals on social media platforms</article-title>
          <source>Addiction</source>
          <year>2023</year>
          <month>02</month>
          <day>08</day>
          <volume>118</volume>
          <issue>2</issue>
          <fpage>206</fpage>
          <lpage>217</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36075258"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/add.16020</pub-id>
          <pub-id pub-id-type="medline">36075258</pub-id>
          <pub-id pub-id-type="pmcid">PMC10087142</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Vidal-Alaball</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Downing</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>López Seguí</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 and the 5G conspiracy theory: social network analysis of Twitter data</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>05</month>
          <day>06</day>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>e19458</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/5/e19458/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19458</pub-id>
          <pub-id pub-id-type="medline">32352383</pub-id>
          <pub-id pub-id-type="pii">v22i5e19458</pub-id>
          <pub-id pub-id-type="pmcid">PMC7205032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bathina</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Thij</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenzo-Luaces</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rutter</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Bollen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Individuals with depression express more distorted thinking on social media</article-title>
          <source>Nat Hum Behav</source>
          <year>2021</year>
          <month>04</month>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>458</fpage>
          <lpage>466</lpage>
          <pub-id pub-id-type="doi">10.1038/s41562-021-01050-7</pub-id>
          <pub-id pub-id-type="medline">33574604</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-021-01050-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bathina</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Thij</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rutter</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Bollen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Declining well-being during the COVID-19 pandemic reveals US social inequities</article-title>
          <source>PLoS One</source>
          <year>2021</year>
          <volume>16</volume>
          <issue>7</issue>
          <fpage>e0254114</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0254114"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0254114</pub-id>
          <pub-id pub-id-type="medline">34237087</pub-id>
          <pub-id pub-id-type="pii">PONE-D-21-07348</pub-id>
          <pub-id pub-id-type="pmcid">PMC8266050</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Thij</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bathina</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rutter</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Bollen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Social media insights into US mental health during the COVID-19 pandemic: longitudinal analysis of Twitter data</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>12</month>
          <day>14</day>
          <volume>22</volume>
          <issue>12</issue>
          <fpage>e21418</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/12/e21418/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21418</pub-id>
          <pub-id pub-id-type="medline">33284783</pub-id>
          <pub-id pub-id-type="pii">v22i12e21418</pub-id>
          <pub-id pub-id-type="pmcid">PMC7744146</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chary</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Genes</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Giraud-Carrier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hanson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Manini</surname>
              <given-names>AF</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology from Tweets: estimating misuse of prescription opioids in the USA from social media</article-title>
          <source>J Med Toxicol</source>
          <year>2017</year>
          <month>12</month>
          <volume>13</volume>
          <issue>4</issue>
          <fpage>278</fpage>
          <lpage>286</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28831738"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13181-017-0625-5</pub-id>
          <pub-id pub-id-type="medline">28831738</pub-id>
          <pub-id pub-id-type="pii">10.1007/s13181-017-0625-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5711756</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Katsuki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
            <name name-style="western">
              <surname>Cuomo</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Establishing a link between prescription drug abuse and illicit online pharmacies: analysis of Twitter data</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>12</month>
          <day>16</day>
          <volume>17</volume>
          <issue>12</issue>
          <fpage>e280</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/12/e280/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5144</pub-id>
          <pub-id pub-id-type="medline">26677966</pub-id>
          <pub-id pub-id-type="pii">v17i12e280</pub-id>
          <pub-id pub-id-type="pmcid">PMC4704982</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Grucza</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Bierut</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>"Get drunk. Smoke weed. Have fun.": a content analysis of tweets about marijuana and alcohol</article-title>
          <source>Am J Health Promot</source>
          <year>2017</year>
          <month>05</month>
          <volume>31</volume>
          <issue>3</issue>
          <fpage>200</fpage>
          <lpage>208</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26559715"/>
          </comment>
          <pub-id pub-id-type="doi">10.4278/ajhp.150205-QUAL-708</pub-id>
          <pub-id pub-id-type="medline">26559715</pub-id>
          <pub-id pub-id-type="pmcid">PMC4956592</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cabrera-Nguyen</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bierut</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Moreno</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Young adults' exposure to alcohol- and marijuana-related content on Twitter</article-title>
          <source>J Stud Alcohol Drugs</source>
          <year>2016</year>
          <month>03</month>
          <volume>77</volume>
          <issue>2</issue>
          <fpage>349</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26997194"/>
          </comment>
          <pub-id pub-id-type="doi">10.15288/jsad.2016.77.349</pub-id>
          <pub-id pub-id-type="medline">26997194</pub-id>
          <pub-id pub-id-type="pmcid">PMC4803667</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Brawner</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Kranzler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Giorgi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Abera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Exploring substance use tweets of youth in the United States: mixed methods study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>03</month>
          <day>26</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e16191</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/1/e16191/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16191</pub-id>
          <pub-id pub-id-type="medline">32213472</pub-id>
          <pub-id pub-id-type="pii">v6i1e16191</pub-id>
          <pub-id pub-id-type="pmcid">PMC7146240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kath</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>QC</given-names>
            </name>
          </person-group>
          <article-title>National substance use patterns on Twitter</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <volume>12</volume>
          <issue>11</issue>
          <fpage>e0187691</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0187691"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0187691</pub-id>
          <pub-id pub-id-type="medline">29107961</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-20338</pub-id>
          <pub-id pub-id-type="pmcid">PMC5673183</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salathé</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bengtsson</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bodnar</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brewer</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Buckee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Cattuto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Khandelwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mabry</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Vespignani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Digital epidemiology</article-title>
          <source>PLoS Comput Biol</source>
          <year>2012</year>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>e1002616</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pcbi.1002616"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002616</pub-id>
          <pub-id pub-id-type="medline">22844241</pub-id>
          <pub-id pub-id-type="pii">PCOMPBIOL-D-12-00494</pub-id>
          <pub-id pub-id-type="pmcid">PMC3406005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance: framework for an emerging set of public health informatics methods to analyze search, communication and publication behavior on the Internet</article-title>
          <source>J Med Internet Res</source>
          <year>2009</year>
          <month>03</month>
          <day>27</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2009/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1157</pub-id>
          <pub-id pub-id-type="medline">19329408</pub-id>
          <pub-id pub-id-type="pii">v11i1e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC2762766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <article-title>Commonly Used Drugs Charts</article-title>
          <source>National Institute on Drug Abuse</source>
          <access-date>2023-04-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://nida.nih.gov/research-topics/commonly-used-drugs-charts">https://nida.nih.gov/research-topics/commonly-used-drugs-charts</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tufts</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <source>The Little Book of LDA</source>
          <access-date>2023-03-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ldabook.com/">https://ldabook.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Latent dirichlet allocation</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>993</fpage>
          <lpage>1022</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Goodson</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Language bias in health research: external factors that influence latent language patterns</article-title>
          <source>Front Res Metr Anal</source>
          <year>2020</year>
          <volume>5</volume>
          <fpage>4</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33870042"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/frma.2020.00004</pub-id>
          <pub-id pub-id-type="medline">33870042</pub-id>
          <pub-id pub-id-type="pmcid">PMC8028389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Andrzejewski</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Buttler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Exploring topic coherence over many models and many topics</article-title>
          <source>Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning</source>
          <year>2012</year>
          <fpage>952</fpage>
          <lpage>961</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D12-1087/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jozkowski</surname>
              <given-names>KN</given-names>
            </name>
            <name name-style="western">
              <surname>Montenegro</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Crawford</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Identifying accurate pro-choice and pro-life identity labels in Spanish: Social media insights and implications for comparative survey research</article-title>
          <source>Perspect Sex Reprod Health</source>
          <year>2022</year>
          <month>12</month>
          <volume>54</volume>
          <issue>4</issue>
          <fpage>166</fpage>
          <lpage>176</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36254620"/>
          </comment>
          <pub-id pub-id-type="doi">10.1363/psrh.12208</pub-id>
          <pub-id pub-id-type="medline">36254620</pub-id>
          <pub-id pub-id-type="pmcid">PMC10092859</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>FDA Announces Shortage of Adderall</article-title>
          <source>Food &#38; Drug Administration</source>
          <year>2022</year>
          <month>10</month>
          <day>22</day>
          <access-date>2023-04-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/drugs/drug-safety-and-availability/fda-announces-shortage-adderall">https://www.fda.gov/drugs/drug-safety-and-availability/fda-announces-shortage-adderall</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sievert</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shirley</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>LDAvis: A method for visualizing and interpreting topics</article-title>
          <source>Proceedings of the Workshop on Interactive Language Learning, Visualization, and Interfaces</source>
          <year>2014</year>
          <fpage>63</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/W14-3110"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/W14-3110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Agley</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Digital Epidemiology of Prescription Drug References among Twitter Users: Results and Methodological Implications from Latent Dirichlet Allocation (LDA) Analyses</article-title>
          <source>OSF</source>
          <year>2023</year>
          <month>05</month>
          <day>30</day>
          <access-date>2023-06-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://osf.io/sruft/?view_only=19bc11648de744a6814ac756d15c834b">https://osf.io/sruft/?view_only=19bc11648de744a6814ac756d15c834b</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>Brand name visualization</article-title>
          <source>Google Drive</source>
          <access-date>2023-06-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://drive.google.com/file/d/1SM_N-DtSiUqwu1YS24W2LQLbTKfZwnam/view">https://drive.google.com/file/d/1SM_N-DtSiUqwu1YS24W2LQLbTKfZwnam/view</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burnett</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Returning to text: affect, meaning making, and literacies</article-title>
          <source>Read Res Q</source>
          <year>2020</year>
          <month>02</month>
          <day>27</day>
          <volume>56</volume>
          <issue>2</issue>
          <fpage>355</fpage>
          <lpage>367</lpage>
          <pub-id pub-id-type="doi">10.1002/rrq.303</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bestvater</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rivero</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Politics on Twitter: One-Third of Tweets From U.S. Adults Are Political</article-title>
          <source>Pew Research Center</source>
          <year>2022</year>
          <month>06</month>
          <day>16</day>
          <access-date>2023-03-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/politics/2022/06/16/politics-on-twitter-one-third-of-tweets-from-u-s-adults-are-political/">https://www.pewresearch.org/politics/2022/06/16/politics-on-twitter-one-third-of-tweets-from-u-s-adults-are-political/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
