<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e42985</article-id>
      <article-id pub-id-type="pmid">36790847</article-id>
      <article-id pub-id-type="doi">10.2196/42985</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Examining Rural and Urban Sentiment Difference in COVID-19–Related Topics on Twitter: Word Embedding–Based Retrospective Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Goodman</surname>
            <given-names>Sara</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wahbeh</surname>
            <given-names>Abdullah</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Yongtai</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>Vanderbilt University</institution>
            <addr-line>2525 West End Ave. Suite 8058</addr-line>
            <addr-line>Nashville, TN, 37203</addr-line>
            <country>United States</country>
            <phone>1 615 343 9096</phone>
            <email>yongtai.liu@vanderbilt.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0279-3644</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Yin</surname>
            <given-names>Zhijun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3075-1337</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ni</surname>
            <given-names>Congning</given-names>
          </name>
          <degrees>ME</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6950-6948</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Yan</surname>
            <given-names>Chao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6719-1388</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wan</surname>
            <given-names>Zhiyu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3752-5778</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Malin</surname>
            <given-names>Bradley</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3040-5175</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>Vanderbilt University</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>Vanderbilt University Medical Center</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Biostatistics</institution>
        <institution>Vanderbilt University Medical Center</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Yongtai Liu <email>yongtai.liu@vanderbilt.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>2</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e42985</elocation-id>
      <history>
        <date date-type="received">
          <day>26</day>
          <month>9</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>25</day>
          <month>10</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>12</day>
          <month>1</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>27</day>
          <month>1</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Yongtai Liu, Zhijun Yin, Congning Ni, Chao Yan, Zhiyu Wan, Bradley Malin. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 15.02.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e42985" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>By the end of 2022, more than 100 million people were infected with COVID-19 in the United States, and the cumulative death rate in rural areas (383.5/100,000) was much higher than in urban areas (280.1/100,000). As the pandemic spread, people used social media platforms to express their opinions and concerns about COVID-19–related topics.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to (1) identify the primary COVID-19–related topics in the contiguous United States communicated over Twitter and (2) compare the sentiments urban and rural users expressed about these topics.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We collected tweets containing geolocation data from May 2020 to January 2022 in the contiguous United States. We relied on the tweets’ geolocations to determine if their authors were in an urban or rural setting. We trained multiple <italic>word2vec</italic> models with several corpora of tweets based on geospatial and timing information. Using a <italic>word2vec</italic> model built on all tweets, we identified hashtags relevant to COVID-19 and performed hashtag clustering to obtain related topics. We then ran an inference analysis for urban and rural sentiments with respect to the topics based on the similarity between topic hashtags and opinion adjectives in the corresponding urban and rural <italic>word2vec</italic> models. Finally, we analyzed the temporal trend in sentiments using monthly <italic>word2vec</italic> models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We created a corpus of 407 million tweets, 350 million (86%) of which were posted by users in urban areas, while 18 million (4.4%) were posted by users in rural areas. There were 2666 hashtags related to COVID-19, which clustered into 20 topics. Rural users expressed stronger negative sentiments than urban users about COVID-19 prevention strategies and vaccination (<italic>P</italic>&#60;.001). Moreover, there was a clear political divide in the perception of politicians by urban and rural users; these users communicated stronger negative sentiments about Republican and Democratic politicians, respectively (<italic>P</italic>&#60;.001). Regarding misinformation and conspiracy theories, urban users exhibited stronger negative sentiments about the “covidiots” and “China virus” topics, while rural users exhibited stronger negative sentiments about the “Dr. Fauci” and “plandemic” topics. Finally, we observed that urban users’ sentiments about the economy appeared to transition from negative to positive in late 2021, which was in line with the US economic recovery.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study demonstrates there is a statistically significant difference in the sentiments of urban and rural Twitter users regarding a wide range of COVID-19–related topics. This suggests that social media can be relied upon to monitor public sentiment during pandemics in disparate types of regions. This may assist in the geographically targeted deployment of epidemic prevention and management efforts.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>social media</kwd>
        <kwd>word embedding</kwd>
        <kwd>topic analysis</kwd>
        <kwd>sentiment analysis</kwd>
        <kwd>Twitter</kwd>
        <kwd>data</kwd>
        <kwd>vaccination</kwd>
        <kwd>prevention</kwd>
        <kwd>urban</kwd>
        <kwd>rural</kwd>
        <kwd>epidemic</kwd>
        <kwd>management</kwd>
        <kwd>model</kwd>
        <kwd>training</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The COVID-19 pandemic has persisted for over two years. By the end of 2022, more than 100 million people in the United States were infected with COVID-19, with notable disparities [<xref ref-type="bibr" rid="ref1">1</xref>]. In particular, the cumulative death rate in rural areas (383.5/100,000) has been significantly higher than in urban areas (280.1/100,000) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>], a disparity that highlights the need to improve practices in prevention and control [<xref ref-type="bibr" rid="ref3">3</xref>]. However, the path to improving the situation in rural environments is not evident, partially due to the fact that urban and rural residents have different attitudes about COVID-19 and policies regarding its management. For example, it has been shown that rural residents are less concerned about the coronavirus [<xref ref-type="bibr" rid="ref4">4</xref>] and are less willing to engage in COVID-19–related prevention behaviors [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Moreover, political polarization influences the public’s attitude and reaction to the COVID-19 pandemic [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>To date, there have been several studies into the differences between urban and rural sentiment about COVID-19 [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. However, these studies have mainly relied upon formal surveys, which are limited in their ability to shed light on the matter because they are time-consuming, and the findings (as well as the policies based on them) can become stale in the face of the rapid evolution of the situation [<xref ref-type="bibr" rid="ref11">11</xref>]. Social media platforms have enabled people to report on their experiences and express their perspectives on COVID-19 on a wide scale. The data generated through social media have been relied upon to study various aspects of health and wellness [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref15">15</xref>], such that it is natural to hypothesize that this large and diverse collection of user-generated data provides opportunities to investigate the differences between urban and rural sentiments. In this paper, we report public sentiment on COVID-19–related topics using data from Twitter, one of the largest social platforms in the United States, with over 200 million daily active users [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      <p>While topic extraction and sentiment analysis are typical natural language processing tasks, prior research on inferring sentiments about COVID-19 from social media has been limited in several ways. First, prior studies [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] have relied on topic modeling techniques, such as latent Dirichlet allocation [<xref ref-type="bibr" rid="ref19">19</xref>], to identify relevant topics from the collected social media data. However, such methods rely on document-level word co-occurrences to infer topic distribution [<xref ref-type="bibr" rid="ref20">20</xref>], which leads to poor topic extraction performance for noisy short-text data [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Second, most studies applied either predefined rules [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>], such as VADER (Valence Aware Dictionary and Sentiment Reasoner) [<xref ref-type="bibr" rid="ref26">26</xref>], or machine learning models to infer sentiment from tweets. While rule-based approaches fail to leverage the contextual information in a specific corpus, which varies by corpus, machine learning approaches [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref32">32</xref>] are hindered by their need for a nontrivial amount of label annotation and training [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref33">33</xref>].</p>
      <p>Even if the labeling process can be expedited, to initiate a study with social media data, it is necessary to collect online posts on the topic of interest. The majority of earlier studies in this area applied keyword filtering to collect COVID-19–related tweets [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. However, keyword filtering is hindered by an incompleteness problem that can lead to biased investigations. For example, in one vaccination opinion study [<xref ref-type="bibr" rid="ref38">38</xref>], tweets were collected using the keyword list “vaccine, vaccination, vaccinate, vaccinating, vaccinated,” which neglected all tweets that used the word “vax.” At the same time, the societal response to the pandemic is constantly evolving, with new keywords being generated at different stages. It is unlikely that one would be aware of all appropriate keywords at any point in time. For instance, in the COVID-19-TweetIDs data set [<xref ref-type="bibr" rid="ref39">39</xref>], the word “vaccine” was not added to the keyword list until November 2021—one year after vaccines received US Food and Drug Administration emergency use authorization.</p>
      <p>In this paper, we investigate differences in the sentiments of urban and rural residents regarding COVID-19 and related topics. To accomplish this task, we introduce a novel approach for COVID-19 sentiment analysis. This approach begins by collecting tweets without any predefined keywords. To identify topics from the brief amount of text in a tweet, the approach leverages word-embedding models and a clustering approach to extract topics related to COVID-19. Our new sentiment analysis approach combines lexicons and semantic information to quantify public sentiment with respect to a specific population of interest regarding COVID-19 and related topics, such as prevention, vaccination, and politics.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> depicts the data processing and research pipeline for this study. It consisted of three primary steps: (1) tweet collection, (2) model training, and (3) sentiment analysis. The collection step involved the gathering of tweets and a designation of their urban or rural status. The model training step involved training multiple <italic>word2vec</italic> models based on geospatial and timing information. Finally, the sentiment analysis step consisted of COVID-19 topic clustering and multidimensional sentiment analysis with opinion adjectives.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>An illustration of the research pipeline. w2v: word2vec.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e42985_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data</title>
        <p>We used the <italic>Tweepy</italic> python library (version 3.8) to collect 407 million geotagged tweets posted in the contiguous United States through the Twitter application programming interface streaming function between May 2020 and January 2022. A geotagged tweet contains location information as either (1) a specific latitude and longitude or (2) a Twitter place text field. For tweets with the place field, we applied geocoding with the <italic>geopy</italic> python package (version 2.2) to obtain the latitude and longitude, which were then translated into 5-digit zip codes. We did not apply keyword filtering during collection, so the collected tweets are expected to be an unbiased sample of all publicly accessible US geotagged tweets.</p>
        <sec>
          <title>Urban and Rural Tweet Classification</title>
          <p>We mapped each zip code into its respective area type according to its rural-urban commuting area (RUCA) coding [<xref ref-type="bibr" rid="ref40">40</xref>]. RUCA codes classify US zip codes and census tracts into 10 levels based on commuting information. For example, level 1 stands for a major metropolitan area, while level 10 represents an isolated rural area. These levels can be further grouped into 4 tiers [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]: urban core (level 1), suburban (levels 2-3), large rural (levels 4-6), and small town/rural (levels 7-10). In this study, we focused on urban core and small town/rural, as we anticipated more notable differences would be found at these levels.</p>
        </sec>
        <sec>
          <title>Preprocessing</title>
          <p>We removed non-English tweets using the tweet’s <italic>lang</italic> attribute and the <italic>langdetect</italic> language detection package (version 1.0.9). For each remaining tweet, we removed URLs, handlers, and the leading “RT” (which stands for “retweet”). We dropped all punctuation and converted all text into lowercase. We then removed tweets with less than 3 words from the data corpus.</p>
        </sec>
      </sec>
      <sec>
        <title>Methodology</title>
        <sec>
          <title>Word Embedding</title>
          <p>We trained word-embedding models using the skip-gram negative sampling approach implemented in the <italic>gensim</italic> python package (version 4.1.2). We set the vector dimension size to 200 and applied a window size of 5. To characterize sentiment changes among urban and rural users on a monthly basis, we trained <italic>word2vec</italic> models using the monthly corpus with 10 epochs. For efficiency, we trained <italic>word2vec</italic> models using tweets across months for 5 epochs. Parameter tuning was accomplished through word analogy tests (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> presents the details). We obtained the word embedding model <italic>all-tweets-w2v</italic> from all of the tweets. Two separate models, <italic>urban-w2v</italic> and <italic>rural-w2v</italic>, were generated using all of the urban core and small town/rural tweets, respectively.</p>
        </sec>
        <sec>
          <title>Topic Extraction With Hashtag Clustering</title>
          <p>Twitter users often apply hashtags to label their tweets by topic or theme [<xref ref-type="bibr" rid="ref43">43</xref>]. Thus, we relied on hashtags to describe and infer topics about COVID-19. We used the word-embedding model <italic>all-tweets-w2v</italic> to find and cluster hashtags related to COVID-19.</p>
          <p>The relevance of a hashtag to COVID-19 was measured through a similarity comparison between the given hashtag vector and the vectors for the 3 most common hashtags in the collected data: #covid19, #covid, and #coronavirus. We defined the relevance score as the maximum of the 3 cosine similarity values. We selected all hashtags with a relevance score over a certain relevance threshold and a frequency greater than 50 from the <italic>all-tweets-w2v</italic> model. These hashtags were then subject to an automated clustering process. It should be recognized that the relevance threshold is crucial to our analysis. A larger threshold will lead to a small set of hashtags, resulting in an undersampling of all related hashtags, whereas a smaller threshold will include non–COVID-19 related hashtags. To determine an appropriate relevance threshold, we instructed 5 human annotators to review hashtags with similarity scores above a threshold and the corresponding clustering quality to judge whether hashtags under the current threshold were related to COVID-19. We reviewed hashtag candidates for various thresholds, ultimately settling on a value of 0.5. Further details about the human evaluation are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <p>We applied uniform manifold approximation and projection for dimension reduction (UMAP) [<xref ref-type="bibr" rid="ref44">44</xref>] on the vector representation of the COVID-19–related hashtags to perform dimensionality reduction and mitigate the impact of a high-dimensional system [<xref ref-type="bibr" rid="ref45">45</xref>]. Clustering was accomplished via hierarchical density-based spatial clustering of applications with noise (HDBSCAN) [<xref ref-type="bibr" rid="ref46">46</xref>]. We performed a grid search on UMAP and HDBSCAN to find the clustering model with the highest relative validity score, a fast approximation of the density-based cluster validity [<xref ref-type="bibr" rid="ref47">47</xref>], to evaluate density-based and arbitrarily shaped clusters. The resulting clusters represented topics related to COVID-19. We defined the topic vector as the weighted average of hashtag vectors in the cluster, where the weight is proportional to the count of the hashtag in the corpus. This definition referenced the general usage of word embedding in document representation [<xref ref-type="bibr" rid="ref48">48</xref>]. All experiments were performed with the <italic>UMAP</italic> (version 0.5.2), <italic>hdbscan</italic> (version 0.8.28), and <italic>sklearn</italic> (version 1.0.2) python packages.</p>
        </sec>
        <sec>
          <title>Sentiment Analysis With Opinion Adjectives</title>
          <p>Opinion adjectives have been adopted to analyze stereotypes through the geometry of word-embedding vectors [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>]. For example, the vector for the adjective “lucky” is close to the vector for “clover” [<xref ref-type="bibr" rid="ref49">49</xref>]. For this work, we relied on the annotated adjectives in SentiWordNet 3.0 [<xref ref-type="bibr" rid="ref51">51</xref>] to quantify people’s sentiments about COVID-19 topics. In SentiWordNet, each word (“w”) has a positive and negative sentiment score: pos() and neg(), respectively. For example, the word “fine” has pos(fine) and neg(fine) scores of 0.625 and 0.125, respectively. We selected adjectives such that each adjective (“a”) was associated with a pos(a) + neg(a) ≥ 0.5 based on the sentiment score distribution of all adjectives in SentiWordNet (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provides details).</p>
          <p>We assumed that adjectives that are more often used to describe a hashtag would have a higher similarity score with respect to the hashtag than those that are infrequently used. In this regard, the difference in the use of adjectives between urban and rural users can be measured via the difference in the hashtag-adjective similarity scores between urban and rural word-embedding models. For instance, the adjectives used mainly by urban users to describe a COVID-19 topic can be learned from comparing the topic vector to the adjectives in the <italic>urban-w2v</italic> model. Similarly, the preference of adjectives for rural users can be obtained from the <italic>rural-w2v</italic> model. We retained adjectives that appeared in both <italic>urban-w2v</italic> and <italic>rural-w2v</italic> for sentiment calculation.</p>
          <p>We combined the topic-adjective similarity score with the sentiment score for adjectives to learn the sentiment for a topic of interest. Formally, given an adjective collection <italic>A</italic>, the sentiment score of an adjective <italic>a</italic> in <italic>A</italic>, represented as <italic>sent (a)</italic>, is defined as pos(a) − neg(a). The raw sentiment score about a target <italic>t</italic> in the <italic>word2vec</italic> model is defined as follows:</p>
          <p>
            <graphic xlink:href="jmir_v25i1e42985_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </p>
          <p>where sim(a,t) refers to the cosine similarity between the vector for adjective <italic>a</italic> and the vector for target <italic>t</italic>.</p>
          <p>To enable a comparison between 2 sentiment systems, we normalized the raw sentiment score of topics in each model according to their <italic>z</italic> score, as follows:</p>
          <p>
            <graphic xlink:href="jmir_v25i1e42985_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </p>
          <p>where <italic>S</italic> defines a baseline hashtag set that contains 1000 randomly sampled hashtags. We normalized the urban and rural sentiment scores using 2 different baseline sets in which hashtags were randomly selected from their respective vocabularies. The raw sentiment scores for the baseline hashtags were relied upon to estimate the mean <italic>avg(S)</italic> and standard deviation <italic>std(S).</italic> The resulting normalized sentiment score reflects the magnitude of positive or negative sentiments, which we applied to compare the differences in urban and rural sentiment.</p>
          <p>We used a topic vector to represent all of the hashtags in a topic. This approach calculates the sentiment about a topic; however, it cannot estimate the variance across sentiments (ie, the sentiment difference for various hashtags). Thus, for each topic, we sampled 25% of the hashtags without replacement according to their weights (ie, proportional to their counts). We then averaged the vectors for these hashtags to obtain a sampled topic vector. The sentiment score for the sampled topic vector was calculated as described earlier. This process was repeated 10 times to obtain a set of scores, which were used to compute the average sentiments and their variance.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data</title>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> depicts the number of tweets collected with respect to their region in the United States, where blue represents urban core areas, and red represents small town/rural areas. A darker color means a higher number of tweets in that area. As can be seen in the figure, the distribution generally matches the urban-rural classification scheme in the United States [<xref ref-type="bibr" rid="ref52">52</xref>]. <xref ref-type="table" rid="table1">Table 1</xref> provides summary statistics for the 3 word-embedding models trained using collected tweets.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Number of tweets collected in US urban core and small town/rural zip codes.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e42985_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Training data for the word-embedding models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Rural-urban commuting area tier</td>
                <td>All tweets</td>
                <td>Urban core</td>
                <td>Small town/rural</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Tweets, n</td>
                <td>407 million</td>
                <td>350 million</td>
                <td>18 million</td>
              </tr>
              <tr valign="top">
                <td>Words per tweet, n</td>
                <td>10.47</td>
                <td>10.44</td>
                <td>10.54</td>
              </tr>
              <tr valign="top">
                <td>Unique hashtags, n</td>
                <td>474,124</td>
                <td>333,177</td>
                <td>30,080</td>
              </tr>
              <tr valign="top">
                <td>Hashtags per tweet, n</td>
                <td>0.18</td>
                <td>0.18</td>
                <td>0.17</td>
              </tr>
              <tr valign="top">
                <td><italic>Word2vec</italic> model</td>
                <td>
                  <italic>all-tweets-w2v</italic>
                </td>
                <td>
                  <italic>urban-w2v</italic>
                </td>
                <td>
                  <italic>rural-w2v</italic>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Topic Clustering</title>
        <p>We collected 2666 COVID-19–related hashtags. These hashtags clustered into 30 distinct topics. After a manual review of the clusters, we determined that 20 topics were closely related to COVID-19 in the United States. The other 10 corresponded to less relevant topics, such as general social justice issues (eg, the George Floyd events) and news about the Middle East or COVID-19 in other countries (eg, Canada, India, and Mexico). <xref rid="figure3" ref-type="fig">Figure 3</xref> presents a 2D representation of the word-embedding vectors for the clustered hashtags in the 20 COVID-19–related topics. Based on the closeness of the topic hashtags, we further grouped the topics into 4 categories: misinformation; prevention and treatment; economy; and news and politics. For example, topics belonging to the misinformation category, including “covidiots,” “China virus,” and “plandemic,” appear in the upper left corner. Topics about news and politics are grouped in the upper right corner. Topics in the prevention category and treatment and economy category also exhibit a similar grouping pattern. Specific topics, namely “COVID-19,” “health,” and “school,” do not fall into the 4 categories.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>A 2D representation of uniform manifold approximation and projection clustering results for 20 topics. Each point represents a distinct hashtag.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e42985_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the number of hashtags, the 10 most tweeted hashtags, and a manually assigned label for each of the 20 topics. It can be seen that the topics “mandates,” “health,” and “vaccine” are affiliated with the most user-generated hashtags, which highlights the users’ concerns about COVID-19 prevention and its impact on health.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The 20 COVID-19 topics inferred from the tweets collected for this study. The topics are presented in descending order according to the number of unique hashtags they hold. The hashtags are presented in descending order according to their frequency.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="270"/>
            <col width="590"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Category/topic label</td>
                <td>Ten most frequent hashtags</td>
                <td>Unique hashtags, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">COVID-19</td>
                <td>covid19, coronavirus, covid, covid_19, pandemic, covid-19, corona, covid__19, omicron, covid-19</td>
                <td>79</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>(1) Misinformation</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Open America</td>
                <td>openamericanow, nomasks, vaccinemandate, maskmandate, nomask, donotcomply, reopenamerica, vaccinepassport, vaccinepassports, maskmandates</td>
                <td>73</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Covidiots</td>
                <td>covidiots, antivaxxers, idiots, moron, covididiots, stupidity, morons, antimaskers, antivaxxer, antivax</td>
                <td>30</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>China Virus</td>
                <td>chinavirus, billgates, ccpvirus, wuhanvirus, wuhan, chinaliedpeopledied, chinesevirus, chinaliedandpeopledied, agenda21, wuhancoronavirus</td>
                <td>30</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Dr. Fauci</td>
                <td>fauci, drfauci, firefauci, faucithefraud, anthonyfauci, fauciliedpeopledied, fauciemails, faucilied, faucifraud, birx</td>
                <td>22</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Plandemic</td>
                <td>plandemic, hoax, scamdemic, factsnotfear, covidhoax, fearmongering, kungflu, scamdemic2020, fearporn, coronahoax</td>
                <td>20</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>HCQ</td>
                <td>hydroxychloroquine, ivermectin, cnntownhall, remdesivir, hcq, regeneron, hydroxycloroquine, trumpvaccine, hydroxycholoroquine, dexamethasone</td>
                <td>16</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>(2) Prevention and treatment</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mandates</td>
                <td>wearamask, 2020, staysafe, maskup, stayhome, socialdistancing, quarantine, quarantinelife, mask, lockdown</td>
                <td>397</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Vaccine</td>
                <td>covidvaccine, vaccine, science, getvaccinated, vaccinated, pfizer, moderna, getvaccinatednow, vaccineswork, covid19vaccine</td>
                <td>198</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Essential Worker</td>
                <td>essentialworkers, nurses, healthcareheroes, inthistogether, healthcareworkers, frontlineworkers, frontlineheroes, healthcareworker, frontliners, frontlines</td>
                <td>27</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>(3) Economy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td>Stimulus Check</td>
                <td>stimuluscheck, stimulus, unemployment, heroesact, americanrescueplan, stimuluspackage, covidrelief, caresact, covidreliefbill, stimulusbill</td>
                <td>28</td>
              </tr>
              <tr valign="top">
                <td>Economy</td>
                <td>economy, housing, homelessness, unemployed, markets, debt, economic, evictionmoratorium, jobsreport, housingcrisis</td>
                <td>26</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>(4) News and politics</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ron DeSantis</td>
                <td>deathsantis, desantis, rondesantis, gregabbott, deathdesantis, desantisfailedflorida, floridacovidepicenter, harriscounty, floriduh, desantisvariant</td>
                <td>58</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Trump Virus</td>
                <td>trumpvirus, trumpknew, trumpliesamericansdie, trumpfailedamerica, trumphasnoplan, trumpliedpeopledied, trumpisanidiot, trumpownseverydeath, trumpgate, trumpliespeopledie</td>
                <td>31</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>News</td>
                <td>foxnews, news, cnn, breakingnews, journalism, nytimes, abcnews, nyt, newyorktimes, nbcnews</td>
                <td>31</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Andrew Cuomo</td>
                <td>cuomo, deblasio, killercuomo, andrewcuomo, governor, chriscuomo, fredo, cuomokilledgrandma, cuomocoverup, governorcuomo</td>
                <td>21</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Trump</td>
                <td>trump, donaldtrump, potus, whitehouse, realdonaldtrump, presidenttrump, pence, mikepence, potus45, donaldtrumpjr</td>
                <td>14</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Fake News</td>
                <td>fakenews, lies, factcheck, propaganda, misinformation, conspiracytheory, disinformation, mainstreammedia, factchecking, bantiktok</td>
                <td>14</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Other COVID-19–related topics</bold>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Health</td>
                <td>health, cancer, anxiety, depression, publichealth, hiv, diabetes, medicine, doctor, breastcancer</td>
                <td>219</td>
              </tr>
              <tr valign="top">
                <td colspan="2">School</td>
                <td>schools, schoolsreopening, schoolreopening, lausd, stayinformed, reopeningschools, nycdoe, publicschools, virtualuntilsafe, dpa</td>
                <td>17</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref> shows the trends in volume and relevance for COVID-19 for the selected topics and categories. Specifically, <xref rid="figure4" ref-type="fig">Figure 4</xref>A shows the volume of tweets for a topic (number of tweets that contain at least one of the topic hashtags), while <xref rid="figure4" ref-type="fig">Figure 4</xref>B shows the relevance of topics to COVID-19. There are several notable observations worth highlighting. First, the most tweeted topic changes over time. For instance, before February 2021, the most tweeted topic was “mandates.” Afterward, “vaccine” became the most tweeted and most relevant topic, with monthly discussions peaking in April 2021. This trend is positively correlated with changes in the number of vaccinated people in the United States. Second, for topics in the “news and politics” category, we found that the changes in the topic “Trump,” both in volume and COVID-19 relevance, are aligned with progress in the 2020 presidential election. The relevance of the “Trump” topic to COVID-19 reached its highest level in October 2020, only a few days before the 2020 presidential election day (November 3), when Donald Trump lost his reelection bid. Third, we observed that before 2022, the trend in the “misinformation” category generally matched the change in the number of new COVID-19 cases. Nevertheless, after 2022, there was a decline in both the volume and relevance scores across all but one topic (“vaccine”), although the number of new COVID-19 cases peaked in January.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>The monthly trend in volume (A) and relevance to COVID-19 (B) for selected topics and categories. The black line indicates the number of monthly new COVID-19 cases in the United States. EUA: emergency use authorization.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e42985_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Urban Versus Rural Sentiment</title>
        <p><xref rid="figure5" ref-type="fig">Figure 5</xref> depicts the normalized urban and rural sentiments about COVID-19–related topics. We normalized urban and rural raw sentiment scores using the mean (SD) acquired from their baseline hashtag sets. For <italic>urban-w2v</italic>, the mean score was –4.58 (SD 5.84). For <italic>rural-w2v</italic>, the mean score was –11.02 (SD 7.43). Both urban and rural users exhibited negative sentiments for the majority of COVID-19–related topics. The only topic with a positive sentiment was “essential worker.” Both urban and rural users communicated weak negative sentiments (between –1 and 0) for the “mandates,” “vaccine” and “health” topics. By contrast, both groups exhibited a strong negative sentiment (around –2) for the topics “news,” “politics,” and “misinformation.”</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Overall normalized urban and rural sentiment toward COVID-19 and 20 selected topics. The category ID for each topic is shown to the left of the topic name. The error bar indicates SD 1 for the sentiment. The 3 additional topics at the bottom (separated by the dotted lines) are displayed to provide readers with some intuition into the degree of positivity (or negativity) represented by the sentiment score. The raw <italic>P</italic> values from the Welch t tests are shown in the right column; bold text indicates a statistically significant difference (<italic>P</italic>&#60;.05/20) after Bonferroni correction.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e42985_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>For topics related to COVID-19 prevention (i.e., “vaccine” and “mandates”), we observed that rural users had a stronger negative sentiment than urban users. For the topics “misinformation” and “conspiracy theory,” we observed that urban users expressed much stronger negative feelings about the “covidiots” and “fake news” topics, while rural users tended to use adjectives with stronger negative sentiments when discussing “open America,” “plandemic,” and “Dr. Fauci.” Finally, for topics related to politics, we observed a clear political divide when comparing the urban and rural users on their sentiment toward political figures. Urban users wrote about Donald Trump and Ron DeSantis (the governor of Florida since January 2019)—both Republicans—with stronger negative sentiments, while rural users were more likely to criticize Andrew Cuomo (the governor of New York from 2011 to 2021)—a Democrat. These urban versus rural sentiment differences in prevention- and politics-related topics are statistically significant (<italic>P</italic>&#60;.001 with Bonferroni correction). This finding seems to align with the growing political divide between urban and rural America. Urban areas tend to be more liberal, with voters supporting Democrats, whereas rural areas tend to be more conservative, supporting Republicans [<xref ref-type="bibr" rid="ref53">53</xref>].</p>
        <p>To gain further intuition into the degree of positivity (or negativity) represented by the sentiment scores, <xref rid="figure5" ref-type="fig">Figure 5</xref> includes 3 additional topics for comparison: “Christmas,” “Thanksgiving,” and “election 2020.” Among these 3 topics, “Christmas” and “Thanksgiving” had positive sentiments, ranging from 0.5 to 0.8, whereas “election 2020” had a negative sentiment of around –0.5.</p>
      </sec>
      <sec>
        <title>Topic Sentiment Temporal Trends</title>
        <p>The temporal trends for topic sentiment were characterized as monthly changes in sentiment. However, it should be noted that some topics and their hashtags only appeared in a certain month. For example, in the rural tweets, the hashtags associated with the “school” topic only appeared in July and August of 2020. This may be due to the fact that school start dates in the United States are typically in late August. As a result, we removed the 11 topics with an insufficient number of hashtags or similar urban and rural sentiment trends; we present them in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. <xref rid="figure6" ref-type="fig">Figure 6</xref> depicts the monthly trend in the sentiments for the 9 remaining topics.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Monthly urban and rural sentiment regarding COVID-19–related topics. For each month, depicted on the x-axis, the center of a dot represents the sentiment value of the topic, while the size of the dot reflects the ratio of the volume of the topic’s current month’s tweets to the sum of the topic’s tweets for all months. The trend lines correspond to a locally weighted linear regression for urban core and small town/rural.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e42985_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>As shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>, the attitudes of urban and rural Twitter users regarding COVID-19 gradually became more negative in general. One of the exceptions, shown in the first row in <xref rid="figure6" ref-type="fig">Figure 6</xref>, was the “economy” topic, for which urban users appeared to transition from a negative to a positive sentiment. A possible reason for this change is the US economic recovery that started in late 2021 [<xref ref-type="bibr" rid="ref54">54</xref>]. The second row in <xref rid="figure6" ref-type="fig">Figure 6</xref> shows the sentiment trends for 3 celebrities. For the topic “Dr. Fauci,” December 2020 was a watershed moment in the public’s attitudes about him; this was when he accepted the offer to become the chief medical advisor to the Biden administration. Among politicians, rural users’ sentiments toward Donald Trump and Ron DeSantis were consistently higher than those of urban users. The temporal trends for sentiment about prevention-related topics are depicted in the third row in <xref rid="figure6" ref-type="fig">Figure 6</xref>, where urban and rural users show similar, gradually declining trends toward “vaccine” and “mandates.” While rural users had relatively stable sentiments toward the topic of “essential workers,” urban users’ sentiments slowly became negative.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>There are several notable findings of this investigation. First, we observed that urban and rural users evidently harbor different sentiments about certain COVID-19–related topics. In particular, urban users exhibited stronger negative sentiments about “covidiots,” “China virus,” “economy,” and “fake news.” By contrast, rural users showed stronger negative sentiments toward “plandemic,” “Dr. Fauci,” and prevention strategies (“vaccine” and “mandates”). These findings are consistent with those of prior investigations [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Callaghan and colleagues [<xref ref-type="bibr" rid="ref6">6</xref>] found that rural residents were less likely to “participate in several COVID-19-related preventive health behaviors,” and Chauhan and colleagues [<xref ref-type="bibr" rid="ref4">4</xref>] observed that rural residents were less concerned about the coronavirus. Moreover, we observed a clear political divide between urban and rural users through the sentiment analysis of 3 politicians. For instance, during the time window covered in this study, urban users viewed Andrew Cuomo more favorably than Donald Trump or Ron DeSantis, while the opposite could be said for rural users. These findings are also consistent with studies on political polarization [<xref ref-type="bibr" rid="ref55">55</xref>]. All of these findings provide evidence that, with our proposed model, social media data can be effectively leveraged to gain timely insight into the public understanding of and sentiment toward hot social events.</p>
        <p>At the same time, we believe that the approach for studying public sentiment introduced in this work has several benefits over prior methods. First, by combining the word-embedding models with sentiment-rich opinion adjective lexicons, users of this approach can conduct sentiment analysis in the learned semantic vector space. This allows users to directly infer the sentiments of a population group toward a topic. In comparison to tweet-level sentiment analysis, one advantage of this approach is that it does not require identifying COVID-19–related tweets by using either keyword filters or machine learning classifiers; thus, this approach is more robust against noise (eg, misspellings, synonyms, and abbreviations) in the online data. Second, unlike commonly used topic modeling techniques such as latent Dirichlet allocation, this new method uses word-embedding vector clustering to identify hashtags and topics of public interest, which works well on large amounts of noisy short-text data, such as in tweets. Third, while our approach was tailored for a sentiment analysis of COVID-19, we believe that the trained word-embedding models can be directly used for sentiment analysis of other social events without the hassle of a new round of data collection and labeling. For instance, our data collection period covers the time of the 2020 presidential election; thus, the trained model can be directly used for election-related sentiment analysis. Another possible application of this model would be to build a topic extraction and sentiment analysis platform where users can input any event of interest to obtain related topics and to infer the public’s sentiments about the event in rural or urban areas. Our learned word-embedding models are publicly available on GitHub [<xref ref-type="bibr" rid="ref56">56</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are several limitations to this study, which we believe serve as opportunities for future research. First, we relied on the tweets’ place attribute to obtain the users’ geolocation and to infer the users’ urban or rural status. This step is not completely accurate, as there are several “nonformatted” places in the collected tweets. The nonformatted places can be ambiguous, such as “McDonald’s,” or too general, such as “Iowa, USA.” Through a manual review of 200 randomly sampled tweets, we found 19 (10%) tweets with nonformatted place attributes. The geocoding results of tweets with a nonformatted place attribute may make our results less significant than the actual urban versus rural differences. Second, to quantify the sentiment of a particular group, our method requires training a word-embedding model for that group. Our method is less effective if the goal is to compare multiple social groups with different demographics. This issue may be resolved with word-embedding geometry [<xref ref-type="bibr" rid="ref57">57</xref>]: performing sentiment analysis of the subspace of the aspect of interest. Finally, it should be recognized that social media–based investigations can, at times, be limited by population sampling bias [<xref ref-type="bibr" rid="ref58">58</xref>], such that the results may not generalize to the entire US population. For example, it has been shown that Twitter users are more likely to be younger and lean politically to the left than the general public [<xref ref-type="bibr" rid="ref59">59</xref>]. However, we believe that when faced with emerging social issues, social media–based sentiment analysis can broadly indicate the public’s views, opinions, and needs. In other words, social media analysis can serve as a timely and complementary approach to inform policy making and resource allocation.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study introduces a novel approach to characterize the public’s sentiment about COVID-19 and related topics. By applying topic recognition and subsequent sentiment analysis, we discovered a clear difference between urban and rural users in their sentiments about COVID-19 prevention strategies, misinformation, politicians, and the economy. While these findings might not be representative of the sentiment of the American public more broadly, we believe that such investigations could help policy makers obtain a more comprehensive understanding of the sentiment differences between urban and rural areas on COVID-19 and related topics, so that more targeted deployment of epidemic prevention efforts can be made. Finally, we wish to highlight that our approach is not limited to COVID-19, and it can readily be extended to other topics of interest without additional data collection or model training.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Word Embedding Model Parameter Selection, Selection of Relevance Threshold of COVID-19 Related Hashtags, and Opinion Adjectives Selection.</p>
        <media xlink:href="jmir_v25i1e42985_app1.docx" xlink:title="DOCX File , 584 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">HDBSCAN</term>
          <def>
            <p>hierarchical density-based spatial clustering of applications with noise</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">RUCA</term>
          <def>
            <p>rural-urban commuting area</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">UMAP</term>
          <def>
            <p>uniform manifold approximation and projection</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>YL and ZY proposed the research idea, which was finalized by BM. YL and CN collected the data. YL and ZY designed and conducted the experiments. CY and ZW advised on the data analysis. YL drafted the manuscript. All authors revised and reviewed the final manuscript. This research was sponsored in part by the National Institutes of Health (grants RM1-HG009034, R01-HG006844, and U2COD023196).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>COVID Data Tracker</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2023-01-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://covid.cdc.gov/covid-data-tracker">https://covid.cdc.gov/covid-data-tracker</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>Covid-19 Dashboard for Rural America</article-title>
          <source>Daily Yonder</source>
          <access-date>2023-01-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dailyyonder.com/covid-19-dashboard-for-rural-america/">https://dailyyonder.com/covid-19-dashboard-for-rural-america/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cuadros</surname>
              <given-names>DF</given-names>
            </name>
            <name name-style="western">
              <surname>Branscum</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mukandavire</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>FD</given-names>
            </name>
            <name name-style="western">
              <surname>MacKinnon</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Dynamics of the COVID-19 epidemic in urban and rural areas in the United States</article-title>
          <source>Ann Epidemiol</source>
          <year>2021</year>
          <month>07</month>
          <volume>59</volume>
          <fpage>16</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33894385"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.annepidem.2021.04.007</pub-id>
          <pub-id pub-id-type="medline">33894385</pub-id>
          <pub-id pub-id-type="pii">S1047-2797(21)00063-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8061094</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chauhan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Salon</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 related attitudes and risk perceptions across urban, rural, and suburban areas in the United States</article-title>
          <source>Findings</source>
          <year>2021</year>
          <fpage>23714</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://findingspress.org/article/23714-covid-19-related-attitudes-and-risk-perceptions-across-urban-rural-and-suburban-areas-in-the-united-states"/>
          </comment>
          <pub-id pub-id-type="doi">10.32866/001c.23714</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alcendor</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>Targeting COVID vaccine hesitancy in rural communities in Tennessee: implications for extending the covid-19 pandemic in the South the COVID-19 Pandemic in the South</article-title>
          <source>Vaccines (Basel)</source>
          <year>2021</year>
          <month>11</month>
          <day>04</day>
          <volume>9</volume>
          <issue>11</issue>
          <fpage>1279</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=vaccines9111279"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/vaccines9111279</pub-id>
          <pub-id pub-id-type="medline">34835210</pub-id>
          <pub-id pub-id-type="pii">vaccines9111279</pub-id>
          <pub-id pub-id-type="pmcid">PMC8621887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Callaghan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lueck</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Trujillo</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Ferdinand</surname>
              <given-names>AO</given-names>
            </name>
          </person-group>
          <article-title>Rural and urban differences in COVID-19 prevention behaviors</article-title>
          <source>J Rural Health</source>
          <year>2021</year>
          <month>03</month>
          <volume>37</volume>
          <issue>2</issue>
          <fpage>287</fpage>
          <lpage>295</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33619836"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jrh.12556</pub-id>
          <pub-id pub-id-type="medline">33619836</pub-id>
          <pub-id pub-id-type="pmcid">PMC8013340</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Box-Steffensmeier</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Moses</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Meaningful messaging: Sentiment in elite social media communication with the public on the COVID-19 pandemic</article-title>
          <source>Sci Adv</source>
          <year>2021</year>
          <month>07</month>
          <volume>7</volume>
          <issue>29</issue>
          <fpage>eabg2898</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.science.org/doi/abs/10.1126/sciadv.abg2898?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/sciadv.abg2898</pub-id>
          <pub-id pub-id-type="medline">34261655</pub-id>
          <pub-id pub-id-type="pii">7/29/eabg2898</pub-id>
          <pub-id pub-id-type="pmcid">PMC8279499</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Green</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Edgerton</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Naftel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shoub</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cranmer</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Elusive consensus: Polarization in elite communication on the COVID-19 pandemic</article-title>
          <source>Sci Adv</source>
          <year>2020</year>
          <month>07</month>
          <volume>6</volume>
          <issue>28</issue>
          <fpage>eabc2717</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.science.org/doi/abs/10.1126/sciadv.abc2717?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/sciadv.abc2717</pub-id>
          <pub-id pub-id-type="medline">32923600</pub-id>
          <pub-id pub-id-type="pii">abc2717</pub-id>
          <pub-id pub-id-type="pmcid">PMC7455486</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haeder</surname>
              <given-names>SF</given-names>
            </name>
          </person-group>
          <article-title>Joining the herd? U.S. public opinion and vaccination requirements across educational settings during the COVID-19 pandemic</article-title>
          <source>Vaccine</source>
          <year>2021</year>
          <month>04</month>
          <day>22</day>
          <volume>39</volume>
          <issue>17</issue>
          <fpage>2375</fpage>
          <lpage>2385</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33810906"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2021.03.055</pub-id>
          <pub-id pub-id-type="medline">33810906</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(21)00348-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC7985928</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gadarian</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Pepinsky</surname>
              <given-names>TB</given-names>
            </name>
          </person-group>
          <article-title>Partisanship, health behavior, and policy attitudes in the early stages of the COVID-19 pandemic</article-title>
          <source>PLoS One</source>
          <year>2021</year>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>e0249596</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0249596"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0249596</pub-id>
          <pub-id pub-id-type="medline">33826646</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-38771</pub-id>
          <pub-id pub-id-type="pmcid">PMC8026027</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Warner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hsueh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>The power of the patient voice: learning indicators of treatment adherence from an online breast cancer forum</article-title>
          <source>Proceedings of the International AAAI Conference on Web and Social Media</source>
          <year>2017</year>
          <conf-name>Eleventh International AAAI Conference on Web and Social Media</conf-name>
          <conf-date>May 15-18, 2017</conf-date>
          <conf-loc>Montreal, QC</conf-loc>
          <fpage>337</fpage>
          <lpage>346</lpage>
          <pub-id pub-id-type="doi">10.1609/icwsm.v11i1.14892</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>The public perception of the #GeneEditedBabies event across multiple social media platforms: observational study</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <month>03</month>
          <day>11</day>
          <volume>24</volume>
          <issue>3</issue>
          <fpage>e31687</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/3/e31687/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/31687</pub-id>
          <pub-id pub-id-type="medline">35275077</pub-id>
          <pub-id pub-id-type="pii">v24i3e31687</pub-id>
          <pub-id pub-id-type="pmcid">PMC8957000</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kantarcioglu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vorobeychik</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>Biomedical research cohort membership disclosure on social media</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2019</year>
          <volume>2019</volume>
          <fpage>607</fpage>
          <lpage>616</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32308855"/>
          </comment>
          <pub-id pub-id-type="medline">32308855</pub-id>
          <pub-id pub-id-type="pmcid">PMC7153128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Sulieman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A systematic literature review of machine learning in online personal health data</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>06</month>
          <day>01</day>
          <volume>26</volume>
          <issue>6</issue>
          <fpage>561</fpage>
          <lpage>576</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30908576"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz009</pub-id>
          <pub-id pub-id-type="medline">30908576</pub-id>
          <pub-id pub-id-type="pii">5419782</pub-id>
          <pub-id pub-id-type="pmcid">PMC7647332</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Vorobeychik</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kantarcioglu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>Implicit incentives among Reddit users to prioritize attention over privacy and reveal their faces when discussing direct-to-consumer genetic test results: topic and attention analysis</article-title>
          <source>JMIR Infodemiology</source>
          <year>2022</year>
          <month>8</month>
          <day>3</day>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e35702</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://infodemiology.jmir.org/2022/2/e35702"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/35702</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <source>Omnicore Agency</source>
          <access-date>2022-08-04</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.omnicoreagency.com/twitter-statistics/">https://www.omnicoreagency.com/twitter-statistics/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chandrasekaran</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Valkunde</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Moustakas</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Topics, trends, and sentiments of tweets about the COVID-19 pandemic: temporal infoveillance study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>23</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e22624</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e22624/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22624</pub-id>
          <pub-id pub-id-type="medline">33006937</pub-id>
          <pub-id pub-id-type="pii">v22i10e22624</pub-id>
          <pub-id pub-id-type="pmcid">PMC7588259</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Luli</surname>
              <given-names>GK</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 vaccine-related discussion on Twitter: topic modeling and sentiment analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>06</month>
          <day>29</day>
          <volume>23</volume>
          <issue>6</issue>
          <fpage>e24435</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/6/e24435/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24435</pub-id>
          <pub-id pub-id-type="medline">34115608</pub-id>
          <pub-id pub-id-type="pii">v23i6e24435</pub-id>
          <pub-id pub-id-type="pmcid">PMC8244724</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Latent Dirichlet allocation</article-title>
          <source>J Mach Learn Res</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>993</fpage>
          <lpage>1022</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/pdf/10.5555/944919.944937"/>
          </comment>
          <pub-id pub-id-type="doi">10.5555/944919.944937</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>A biterm topic model for short texts</article-title>
          <source>Proceedings of the 22nd International Conference on World Wide Web</source>
          <year>2013</year>
          <conf-name>The 22nd International Conference on World Wide Web</conf-name>
          <conf-date>Rio de Janeiro, Brazil</conf-date>
          <conf-loc>May 13-17, 2013</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2488388.2488514</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Experimental explorations on short text topic mining between LDA and NMF based Schemes</article-title>
          <source>Knowl Based Syst</source>
          <year>2019</year>
          <month>01</month>
          <volume>163</volume>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1016/j.knosys.2018.08.011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tajbakhsh</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Bagherzadeh</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Semantic knowledge LDA with topic vector for recommending hashtags: Twitter use case</article-title>
          <source>Intell Data Anal</source>
          <year>2019</year>
          <month>04</month>
          <day>29</day>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>609</fpage>
          <lpage>622</lpage>
          <pub-id pub-id-type="doi">10.3233/ida-183998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rustam</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kanwal</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mehmood</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>US based COVID-19 Tweets sentiment analysis using TextBlob and supervised machine learning algorithms</article-title>
          <year>2021</year>
          <conf-name>2021 International Conference on Artificial Intelligence (ICAI)</conf-name>
          <conf-date>April 5-7, 2021</conf-date>
          <conf-loc>Islamabad, Pakistan</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icai52203.2021.9445207</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sattar</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Arifuzzaman</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 vaccination awareness and aftermath: public sentiment analysis on Twitter data and vaccinated population prediction in the USA</article-title>
          <source>Appl Sci</source>
          <year>2021</year>
          <month>06</month>
          <day>30</day>
          <volume>11</volume>
          <issue>13</issue>
          <fpage>6128</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/2076-3417/11/13/6128/pdf?version=1625726675"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/app11136128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeung</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Face off: polarized public opinions on personal face mask usage during the COVID-19 pandemic</article-title>
          <year>2020</year>
          <conf-name>2020 IEEE International Conference on Big Data</conf-name>
          <conf-date>December 10-13, 2020</conf-date>
          <conf-loc>Atlanta, GA</conf-loc>
          <fpage>10</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1109/bigdata50022.2020.9378114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>VADER:  a parsimonious rule-based model for sentiment analysis of social media text</article-title>
          <source>Proceedings of the International AAAI Conference on Web and Social Media</source>
          <year>2014</year>
          <month>05</month>
          <day>16</day>
          <conf-name>International AAAI Conference on Web and Social Media</conf-name>
          <conf-date>June 1-4, 2014</conf-date>
          <conf-loc>Ann Arbor, MI</conf-loc>
          <fpage>216</fpage>
          <lpage>225</lpage>
          <pub-id pub-id-type="doi">10.1609/icwsm.v8i1.14550</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Twitter discussions and emotions about the COVID-19 pandemic: machine learning approach</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>11</month>
          <day>25</day>
          <volume>22</volume>
          <issue>11</issue>
          <fpage>e20550</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/11/e20550/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20550</pub-id>
          <pub-id pub-id-type="medline">33119535</pub-id>
          <pub-id pub-id-type="pii">v22i11e20550</pub-id>
          <pub-id pub-id-type="pmcid">PMC7690968</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Ramahi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elnoshokaty</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>El-Gayar</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Nasralah</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wahbeh</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Public discourse against masks in the COVID-19 Era: infodemiology study of Twitter data</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2021</year>
          <month>04</month>
          <day>05</day>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>e26780</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2021/4/e26780/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26780</pub-id>
          <pub-id pub-id-type="medline">33720841</pub-id>
          <pub-id pub-id-type="pii">v7i4e26780</pub-id>
          <pub-id pub-id-type="pmcid">PMC8023378</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cotfas</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Delcea</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gherai</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Roxin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Unmasking people’s opinions behind mask-wearing during COVID-19 pandemic—a Twitter stance analysis</article-title>
          <source>Symmetry</source>
          <year>2021</year>
          <month>10</month>
          <day>21</day>
          <volume>13</volume>
          <issue>11</issue>
          <fpage>1995</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/2073-8994/13/11/1995/pdf?version=1635744832"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/sym13111995</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Monitoring depression trends on Twitter during the COVID-19 pandemic: observational study</article-title>
          <source>JMIR Infodemiology</source>
          <year>2021</year>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>e26769</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34458682"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26769</pub-id>
          <pub-id pub-id-type="medline">34458682</pub-id>
          <pub-id pub-id-type="pii">v1i1e26769</pub-id>
          <pub-id pub-id-type="pmcid">PMC8330892</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Glandt</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Khanal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Caragea</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Caragea</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Stance detection in COVID-19 tweets</article-title>
          <source>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</source>
          <year>2021</year>
          <conf-name>the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing</conf-name>
          <conf-date>August 2-5, 2021</conf-date>
          <conf-loc>Ratchathewi, Bangkok</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.127</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jelodar</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Orji</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Deep sentiment classification and topic discovery on novel coronavirus or COVID-19 online discussions: NLP using LSTM recurrent neural network approach</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2020</year>
          <month>10</month>
          <volume>24</volume>
          <issue>10</issue>
          <fpage>2733</fpage>
          <lpage>2742</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2020.3001216</pub-id>
          <pub-id pub-id-type="medline">32750931</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Last</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Litvak</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Twitter Data Augmentation for Monitoring Public Opinion on COVID-19 Intervention Measures</article-title>
          <source>Proceedings of the 1st Workshop on NLP for COVID-19 (Part 2) at EMNLP 2020</source>
          <year>2020</year>
          <conf-name>The 2020 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>November 16-18, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.nlpcovid19-2.19</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abd-Alrazaq</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alhuwail</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Househ</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hamdi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Top concerns of Tweeters during the COVID-19 pandemic: infoveillance study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>04</month>
          <day>21</day>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>e19016</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/4/e19016/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19016</pub-id>
          <pub-id pub-id-type="medline">32287039</pub-id>
          <pub-id pub-id-type="pii">v22i4e19016</pub-id>
          <pub-id pub-id-type="pmcid">PMC7175788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lwin</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sheldenkar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Global sentiments surrounding the COVID-19 pandemic on Twitter: analysis of Twitter trends</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>05</month>
          <day>22</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19447</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e19447/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19447</pub-id>
          <pub-id pub-id-type="medline">32412418</pub-id>
          <pub-id pub-id-type="pii">v6i2e19447</pub-id>
          <pub-id pub-id-type="pmcid">PMC7247466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vishwanath</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 Twitter dataset with latent topics, sentiments and emotions attributes</article-title>
          <source>ArXiv. Preprint posted online June 25</source>
          <year>2023</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2007.06954</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Berton</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Topic detection and sentiment analysis in Twitter content related to COVID-19 from Brazil and the USA</article-title>
          <source>Appl Soft Comput</source>
          <year>2021</year>
          <month>03</month>
          <volume>101</volume>
          <fpage>107057</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33519326"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.asoc.2020.107057</pub-id>
          <pub-id pub-id-type="medline">33519326</pub-id>
          <pub-id pub-id-type="pii">S1568-4946(20)30995-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC7832522</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cotfas</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Delcea</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Roxin</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ioanas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gherai</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Tajariol</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>The longest month: analyzing COVID-19 vaccination opinions dynamics from tweets in the month following the first vaccine announcement</article-title>
          <source>IEEE Access</source>
          <year>2021</year>
          <volume>9</volume>
          <fpage>33203</fpage>
          <lpage>33223</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&#38;arnumber=9354776"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/access.2021.3059821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Tracking social media discourse about the COVID-19 pandemic: development of a public coronavirus Twitter data set</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>05</month>
          <day>29</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19273</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e19273/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19273</pub-id>
          <pub-id pub-id-type="medline">32427106</pub-id>
          <pub-id pub-id-type="pii">v6i2e19273</pub-id>
          <pub-id pub-id-type="pmcid">PMC7265654</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <article-title>Rural-Urban Commuting Area Codes</article-title>
          <source>Economic Research Service, US Department of Agriculture</source>
          <access-date>2022-06-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ers.usda.gov/data-products/rural-urban-commuting-area-codes.aspx">https://www.ers.usda.gov/data-products/rural-urban-commuting-area-codes.aspx</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Onega</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Alford-Teaster</surname>
              <given-names>Jennifer</given-names>
            </name>
            <name name-style="western">
              <surname>Goodrich</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eliassen</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Concordance of rural-urban self-identity and zip code-derived rural-urban commuting area (RUCA) designation</article-title>
          <source>J Rural Health</source>
          <year>2020</year>
          <month>03</month>
          <volume>36</volume>
          <issue>2</issue>
          <fpage>274</fpage>
          <lpage>280</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30913340"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jrh.12364</pub-id>
          <pub-id pub-id-type="medline">30913340</pub-id>
          <pub-id pub-id-type="pmcid">PMC6763368</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hailu</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Guidelines for Using Rural-Urban Classification Systems for Community Health Assessment</article-title>
          <source>Washington State Department of Health</source>
          <year>2016</year>
          <access-date>2022-06-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doh.wa.gov/sites/default/files/legacy/Documents/1500//RUCAGuide.pdf">https://doh.wa.gov/sites/default/files/legacy/Documents/1500//RUCAGuide.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Discover breaking events with popular hashtags in Twitter</article-title>
          <source>CIKM '12: Proceedings of the 21st ACM international conference on information and knowledge management</source>
          <year>2012</year>
          <conf-name>21st ACM International Conference on Information and Knowledge Management</conf-name>
          <conf-date>October 29-November 2, 2012</conf-date>
          <conf-loc>Maui, HI</conf-loc>
          <fpage>1794</fpage>
          <lpage>1798</lpage>
          <pub-id pub-id-type="doi">10.1145/2396761.2398519</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McInnes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Healy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saul</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Großberger</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>UMAP: uniform manifold approximation and projection</article-title>
          <source>J Open Source Softw</source>
          <year>2018</year>
          <month>09</month>
          <volume>3</volume>
          <issue>29</issue>
          <fpage>861</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.theoj.org/joss-papers/joss.00861/10.21105.joss.00861.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.21105/joss.00861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hinneburg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Keim</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Van den Bussche</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vianu</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>On the surprising behavior of distance metrics in high dimensional space</article-title>
          <source>Database Theory — ICDT 2001. Lecture Notes in Computer Science</source>
          <year>2001</year>
          <publisher-loc>Berlin, Germany</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>420</fpage>
          <lpage>434</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McInnes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Healy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Astels</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>hdbscan: Hierarchical density based clustering</article-title>
          <source>J Open Source Softw</source>
          <year>2017</year>
          <month>03</month>
          <volume>2</volume>
          <issue>11</issue>
          <fpage>205</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.theoj.org/joss-papers/joss.00205/10.21105.joss.00205.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.21105/joss.00205</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moulavi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jaskowiak</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Campello</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zimek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sander</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Density-based clustering validation</article-title>
          <source>Proceedings of the 2014 SIAM International Conference on Data Mining (SDM)</source>
          <year>2014</year>
          <conf-name>The 2014 SIAM International Conference on Data Mining</conf-name>
          <conf-date>April 24-26, 2014</conf-date>
          <conf-loc>Philadelphia, PA</conf-loc>
          <fpage>839</fpage>
          <lpage>847</lpage>
          <pub-id pub-id-type="doi">10.1137/1.9781611973440.96</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaibi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nfaoui</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Satori</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A Comparative Evaluation of Word Embeddings Techniques for Twitter Sentiment Analysis</article-title>
          <year>2019</year>
          <conf-name>International Conference on Wireless Technologies, Embedded and Intelligent Systems (WITS)</conf-name>
          <conf-date>Fez, Morocco</conf-date>
          <conf-loc>April 3-4, 2019</conf-loc>
          <fpage>1</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1109/wits.2019.8723864</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caliskan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bryson</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Narayanan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Semantics derived automatically from language corpora contain human-like biases</article-title>
          <source>Science</source>
          <year>2017</year>
          <month>04</month>
          <day>14</day>
          <volume>356</volume>
          <issue>6334</issue>
          <fpage>183</fpage>
          <lpage>186</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://core.ac.uk/reader/161916836?utm_source=linkout"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/science.aal4230</pub-id>
          <pub-id pub-id-type="medline">28408601</pub-id>
          <pub-id pub-id-type="pii">356/6334/183</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garg</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Schiebinger</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jurafsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Word embeddings quantify 100 years of gender and ethnic stereotypes</article-title>
          <source>Proc Natl Acad Sci USA</source>
          <year>2018</year>
          <month>04</month>
          <day>17</day>
          <volume>115</volume>
          <issue>16</issue>
          <fpage>E3635</fpage>
          <lpage>E3644</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29615513"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1720347115</pub-id>
          <pub-id pub-id-type="medline">29615513</pub-id>
          <pub-id pub-id-type="pii">1720347115</pub-id>
          <pub-id pub-id-type="pmcid">PMC5910851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baccianella</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Esuli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sebastiani</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>SentiWordNet 3.0: An Enhanced Lexical Resource for Sentiment Analysis and Opinion Mining</article-title>
          <source>Proceedings of the Seventh International Conference on Language Resources and Evaluation(LREC '10)</source>
          <year>2010</year>
          <conf-name>The Seventh International Conference on Language Resources and Evaluation</conf-name>
          <conf-date>May 17-23, 2010</conf-date>
          <conf-loc>Valletta, Malta</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <article-title>Data Access - Urban Rural Classification Scheme for Counties</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2019</year>
          <access-date>2022-08-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/nchs/data_access/urban_rural.htm">https://www.cdc.gov/nchs/data_access/urban_rural.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maxwell</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Why are urban and rural areas so politically divided?</article-title>
          <source>Washington Post</source>
          <year>2019</year>
          <month>3</month>
          <day>5</day>
          <access-date>2022-12-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.washingtonpost.com/politics/2019/03/05/why-are-urban-rural-areas-so-politically-divided/">https://www.washingtonpost.com/politics/2019/03/05/why-are-urban-rural-areas-so-politically-divided/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <article-title>The Data Underlying America's Strong Economic Recovery</article-title>
          <source>US Department of the Treasury</source>
          <access-date>2022-08-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://home.treasury.gov/news/featured-stories/the-data-underlying-americas-strong-economic-recovery">https://home.treasury.gov/news/featured-stories/the-data-underlying-americas-strong-economic-recovery</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gimpel</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Lovin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Moy</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Reeves</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The urban–rural gulf in American political behavior</article-title>
          <source>Polit Behav</source>
          <year>2020</year>
          <month>03</month>
          <day>05</day>
          <volume>42</volume>
          <issue>4</issue>
          <fpage>1343</fpage>
          <lpage>1368</lpage>
          <pub-id pub-id-type="doi">10.1007/s11109-020-09601-w</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="web">
          <article-title>COVID19-W2V</article-title>
          <source>GitHub</source>
          <access-date>2022-08-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/yongtai123/COVID19-W2V">https://github.com/yongtai123/COVID19-W2V</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kozlowski</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Taddy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>The geometry of culture: analyzing the meanings of class through word embeddings</article-title>
          <source>Am Sociol Rev</source>
          <year>2019</year>
          <month>09</month>
          <day>25</day>
          <volume>84</volume>
          <issue>5</issue>
          <fpage>905</fpage>
          <lpage>949</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/0003122419877135"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0003122419877135</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Understanding demographic and socioeconomic biases of geotagged Twitter users at the county level</article-title>
          <source>Cartogr Geogr Inf Sci</source>
          <year>2018</year>
          <month>02</month>
          <day>09</day>
          <volume>46</volume>
          <issue>3</issue>
          <fpage>228</fpage>
          <lpage>242</lpage>
          <pub-id pub-id-type="doi">10.1080/15230406.2018.1434834</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <article-title>Sizing Up Twitter Users</article-title>
          <source>Pew Research Center</source>
          <year>2019</year>
          <access-date>2022-12-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2019/04/24/sizing-up-twitter-users/">https://www.pewresearch.org/internet/2019/04/24/sizing-up-twitter-users/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
