<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e47508</article-id>
      <article-id pub-id-type="pmid">38294856</article-id>
      <article-id pub-id-type="doi">10.2196/47508</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Public Opinion About COVID-19 on a Microblog Platform in China: Topic Modeling and Multidimensional Sentiment Analysis of Social Media</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mondal</surname>
            <given-names>Himel</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Cohen</surname>
            <given-names>Odeya</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Liu</surname>
            <given-names>Hongbing</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hongliang</surname>
            <given-names>Qiu</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Guo</surname>
            <given-names>Feipeng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1263-2067</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Zixiang</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2994-1379</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>Qibei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>School of International Business</institution>
            <institution>Zhejiang International Studies University</institution>
            <addr-line>299 Liuhe Road</addr-line>
            <addr-line>Xihu District</addr-line>
            <addr-line>Hangzhou, 310030</addr-line>
            <country>China</country>
            <phone>86 13857659112</phone>
            <email>luqibei@zisu.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6730-1620</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Ji</surname>
            <given-names>Shaobo</given-names>
          </name>
          <degrees>PhD, Prof Dr</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0932-6758</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Chen</given-names>
          </name>
          <degrees>MSE</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1335-1892</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Modern Business Research Center</institution>
        <institution>Zhejiang Gongshang University</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Management and E-Business</institution>
        <institution>Zhejiang Gongshang University</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of International Business</institution>
        <institution>Zhejiang International Studies University</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Sprott School of Business</institution>
        <institution>Carleton University</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>General Manager's Office</institution>
        <institution>Hangzhou Gaojin Technology Co, Ltd</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Qibei Lu <email>luqibei@zisu.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>1</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e47508</elocation-id>
      <history>
        <date date-type="received">
          <day>23</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>20</day>
          <month>8</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>9</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>20</day>
          <month>12</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Feipeng Guo, Zixiang Liu, Qibei Lu, Shaobo Ji, Chen Zhang. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 31.01.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e47508" xlink:type="simple"/>
      <related-article related-article-type="correction-forward" xlink:title="This is a corrected version. See correction statement in:" xlink:href="https://www.jmir.org/2024/1/e57233" vol="26" page="e57233"> </related-article>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The COVID-19 pandemic raised wide concern from all walks of life globally. Social media platforms became an important channel for information dissemination and an effective medium for public sentiment transmission during the COVID-19 pandemic.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Mining and analyzing social media text information can not only reflect the changes in public sentiment characteristics during the COVID-19 pandemic but also help the government understand the trends in public opinion and reasonably control public opinion.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>First, this study collected microblog comments related to the COVID-19 pandemic as a data set. Second, sentiment analysis was carried out based on the topic modeling method combining latent Dirichlet allocation (LDA) and Bidirectional Encoder Representations from Transformers (BERT). Finally, a machine learning logistic regression (ML-LR) model combined with a sparse matrix was proposed to explore the evolutionary trend in public opinion on social media and verify the high accuracy of the model.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The experimental results show that, in different stages, the characteristics of public emotion are different, and the overall trend is from negative to positive.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The proposed method can effectively reflect the characteristics of the different times and space of public opinion. The results provide theoretical support and practical reference in response to public health and safety events.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>social media public opinion</kwd>
        <kwd>microblog</kwd>
        <kwd>sentiment analysis</kwd>
        <kwd>topic modeling</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Due to the global influence of COVID-19, it has recently become a research hot spot in many fields [<xref ref-type="bibr" rid="ref1">1</xref>]. Public opinion on social media refers to the public opinion expressed on social media software as the communication platform and social issues as the main topic, including public cognition and attitude [<xref ref-type="bibr" rid="ref2">2</xref>]. Public opinion generated by public security and health emergencies such as infectious diseases spreads rapidly and is of an explosive nature [<xref ref-type="bibr" rid="ref3">3</xref>]. Reasonable control of public opinion played an important role in curbing public panic during the COVID-19 pandemic [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      <p>In recent years, social media platforms have gradually become the main information dissemination medium for the public [<xref ref-type="bibr" rid="ref5">5</xref>]. The use of social media to measure public attention has also been gradually applied to the research of acute infectious diseases, such as influenza A (H1N1) [<xref ref-type="bibr" rid="ref6">6</xref>]. Ahmad et al [<xref ref-type="bibr" rid="ref7">7</xref>] demonstrated a correlation between the daily number of COVID-19–related tweets and the daily number of COVID-19 cases and deaths in Iran and Turkey. Public knowledge was limited due to a lack of information about, inexperience with, and poor awareness of the nature of the virus. Text-based data on social media platforms contain rich information about public opinion and sentiment [<xref ref-type="bibr" rid="ref8">8</xref>]. As of September 2022, the number of monthly active users of microblogging platforms reached 584 million, with mobile use accounting for 95%, and the number of daily active users reached 253 million. Although social media platforms disseminate information, they also present changes in public emotions [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Text-based data on social media provide accurate geolocations, rich emotional information, and distinct topics; the different topic information within social media text data is an important data source for public opinion research [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      <p>Most traditional spatiotemporal studies of public opinion use the administrative region as the analysis unit or the coordinates of users' blogs or comments to analyze the geographic density [<xref ref-type="bibr" rid="ref12">12</xref>]. Box-Steffensmeier and Moses [<xref ref-type="bibr" rid="ref13">13</xref>] explored the level of sentiment and severity of the pandemic in each state in the United States during the COVID-19 pandemic; however, they used only the administrative region as the unit of analysis, and the scope of analysis was limited. Liu et al [<xref ref-type="bibr" rid="ref14">14</xref>] conducted spatiotemporal analysis of the evolution of public opinion during the COVID-19 pandemic based on microblog data and visualized public sentiment and the number of microblogs in provinces and cities across the country. Although the research scope was expanded to the whole country, the selected spatial units were relatively concentrated and were not able to reflect the differences in public opinion at different scales. The spread of the pandemic has obvious regional differences between cities. Liu and Liu [<xref ref-type="bibr" rid="ref15">15</xref>] used the GeoSEn geoparser to obtain geographic location information from text data and converted it into coordinates for emotional spatial analysis, but the monitoring location was not accurate, which meant the analytic results deviated from the spatial standard. In particular, the results from administrative units at different levels are quite different, while there is a small number of microblogs with user location coordinates, which leads to further errors in the results. In addition, the propagation of public opinion on social media is regionally correlated. The scope of public concern not only is limited to local areas but also includes information from other adjacent areas, which cannot be effectively mined by traditional public opinion analysis methods [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      <p>Social media data published by users contain rich geographic information, and many scholars have used social media to simulate and predict real events, such as predicting the spread of influenza, detecting earthquakes, and monitoring air quality. To monitor public opinion, Hu et al [<xref ref-type="bibr" rid="ref17">17</xref>] proposed a model combining latent Dirichlet allocation (LDA) based on the document generation model and the k-means text classification algorithm based on genetic optimization; this combination can improve the accuracy of the clustering algorithm to monitor public opinion. Specifically, this method can identify and track topics related to public opinion on Twitter as well as mine the public’s emotional characteristics in different fields. Zhang et al [<xref ref-type="bibr" rid="ref18">18</xref>] studied algorithms to monitor a public opinion network, compared and analyzed the advantages and disadvantages of different text classification and emotional tendency algorithms, and discussed future trends in algorithm development to monitor public panic during the COVID-19 pandemic in different geographical locations. Microblogs contain emotional information from social users. Researchers such as Jang et al [<xref ref-type="bibr" rid="ref19">19</xref>] have analyzed microblog data to determine the spatial and temporal variations in emotions expressed by Twitter users in North America. They also analyzed values given to emotions and the relationships between the influencing factors, which highlighted the study area index and emotional score. In their study, the spatiotemporal variation in the public's attention to the pandemic and emotion were discussed. Storey and O'Leary [<xref ref-type="bibr" rid="ref20">20</xref>] constructed a global sentiment map of Twitter users and analyzed the spatiotemporal characteristics of urban sentiment by calculating the sentiment information from social media text-based data. The risk of pandemic spread at multiple time nodes was evaluated by converting geographic information from text into coordinates for emotional spatial analysis. Alhashmi et al [<xref ref-type="bibr" rid="ref21">21</xref>] proposed a sentiment classification network model combining part of a speech attention mechanism and long short-term memory network, which can fully mine the relationship between emotional polarity words and emotional target words of sentences and analyze the correlation between the spatial distribution of online public opinion and high-risk areas of the pandemic through spatiotemporal migration.</p>
      <p>This study aimed to mine social media comments on COVID-19 topics and explore their spatiotemporal distribution characteristics. The specific research period was from July 2020 to June 2021. More than 100,000 comments from 12 People's Daily microblogs were obtained through the microblog application programming interface (API) interface, and 2000 samples were screened for feature extraction and sentiment analysis. On this basis, we proposed 3 stages of the pandemic covering the changes in public opinion. Meanwhile, through topic modeling, 12 core topics were identified to map the potential drivers of these changes. The purpose of this study was to help government and public health security departments understand the public's emotions and views on the COVID-19 pandemic, formulate more reasonable pandemic prevention policies, effectively respond to the COVID-19 pandemic, and maintain social stability.</p>
      <p>The rest of the paper is structured as follows: The Methods section introduces the research ideas and key algorithms used in this study. The Results section extends the analysis of the experimental results. The Discussion section concludes the paper with contributions, limitations, and directions for future research.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>First, based on the API interface for the Sina microblog, data from COVID-19–related microblog comments from July 1, 2020, to June 1, 2021, which was in the middle of the COVID-19 pandemic, were obtained and preprocessed. Second, we considered the characteristic of low granularity as well as short words in the microblog comments [<xref ref-type="bibr" rid="ref22">22</xref>]. Based on the SnowNLP Chinese sentiment vocabulary ontology, the Dalian University of Technology sentiment classification dictionary [<xref ref-type="bibr" rid="ref23">23</xref>] is optimized to identify sentiment features, and a multidimensional analysis model of public opinion on social media during the pandemic was constructed from the perspective of spatiotemporal correlation. Third, according to the multiscale division method of public opinion, the term frequency-inverse document frequency (TF-IDF) method was used to explore the semantic features of public opinion. Fourth, according to the difference in public opinion data characteristics, a model combining LDA and Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref24">24</xref>] was used to classify the topics. The number of topics was determined according to minimum perplexity, and the topic weight was established using chi-square test results. Finally, a machine learning logistic regression (ML-LR) sparse matrix fusion model was used to analyze the evolutionary trend in public opinion combined with the monthly pandemic notification information. The technical route and structure of this paper are shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The system architecture of this study. LDA: latent Dirichlet allocation; ML-LR: machine learning logistic regression; TF-IDF: term frequency-inverse document frequency.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e47508_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Extraction</title>
        <p>This study collected data for topics related to COVID-19 in the monthly hot topic search of the People's Daily microblogs from July 2020 to June 2021, including the hot topic name, topic search volume, and date of the blog post with the most searched topic, and selected more than 100,000 comments from 12 microblogs. These 12 blogs have a high degree of heat (eg, likes, comments, and retweets), and the blog topics represent the hot events of the corresponding timeline, which can be regarded as a microcosm of the different development stages of the COVID-19 pandemic in China. The specific data from these 12 microblogs are shown in <xref ref-type="table" rid="table1">Table 1</xref>. Based on the ArcGIS interface of the geocoding package Geocoder, the geographic latitude and longitude of the check-in location coordinate system were then obtained, and the IP address geographic location information of the user was calculated. Through the Requests library of the Python language, the data interface was requested and parsed, and 2000 comments were screened for sentiment analysis. An example of the crawled microblog data is shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Data information statistics of 12 representative blogs about the COVID-19 pandemic in China from July 2020 to June 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="490"/>
            <col width="160"/>
            <col width="90"/>
            <col width="110"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Topic</td>
                <td>Date</td>
                <td>Likes, n</td>
                <td>Comments, n</td>
                <td>Forwards, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>127 new cases were confirmed in 31 provinces</td>
                <td>July 31, 2020</td>
                <td>113,000</td>
                <td>11,000</td>
                <td>1453</td>
              </tr>
              <tr valign="top">
                <td>Three new imported cases were reported in Wuhan</td>
                <td>August 2, 2020</td>
                <td>62,000</td>
                <td>4987</td>
                <td>1015</td>
              </tr>
              <tr valign="top">
                <td>Yunnan released details of a new asymptomatic case</td>
                <td>September 1, 2020</td>
                <td>56,000</td>
                <td>2973</td>
                <td>811</td>
              </tr>
              <tr valign="top">
                <td>Qingdao has collected 1.03 million samples for nucleic acid testing</td>
                <td>October 12, 2020</td>
                <td>61,000</td>
                <td>3257</td>
                <td>1580</td>
              </tr>
              <tr valign="top">
                <td>Wu Zunyou said cold chain imports or become the source of the pandemic in China</td>
                <td>November 11, 2020</td>
                <td>30,000</td>
                <td>2464</td>
                <td>3072</td>
              </tr>
              <tr valign="top">
                <td>A COVID-19 vaccine has been approved for marketing in China</td>
                <td>December 31, 2020</td>
                <td>720,000</td>
                <td>40,000</td>
                <td>93,000</td>
              </tr>
              <tr valign="top">
                <td>Zhong Nanshan thanks you for not going home during the Spring Festival</td>
                <td>January 25, 2021</td>
                <td>113,000</td>
                <td>8871</td>
                <td>6025</td>
              </tr>
              <tr valign="top">
                <td>China's medium and high-risk areas are cleared today</td>
                <td>February 22, 2021</td>
                <td>8935</td>
                <td>1093</td>
                <td>1498</td>
              </tr>
              <tr valign="top">
                <td>Zhong Nanshan says there are dangers in not getting vaccinated in time</td>
                <td>March 31, 2021</td>
                <td>453,000</td>
                <td>37,000</td>
                <td>13,000</td>
              </tr>
              <tr valign="top">
                <td>Cluster infection occurred among Chinese students</td>
                <td>April 30, 2021</td>
                <td>6697</td>
                <td>673</td>
                <td>235</td>
              </tr>
              <tr valign="top">
                <td>The confirmed cases in Liwan, Guangzhou were infected with Indian variant strain</td>
                <td>May 23, 2021</td>
                <td>270,000</td>
                <td>10,000</td>
                <td>5009</td>
              </tr>
              <tr valign="top">
                <td>Zhong Nanshan said the concept of close connection should be updated</td>
                <td>June 26, 2021</td>
                <td>101,000</td>
                <td>4134</td>
                <td>15,000</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Sample comment information about the COVID-19 pandemic in China crawled from a microblog platform from July 2020 to June 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="150"/>
            <col width="610"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>ID</td>
                <td>Date</td>
                <td>Review</td>
                <td>IP</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>User 1</td>
                <td>October 15, 2020</td>
                <td>It is hoped that the test reagents can be distributed to every city in the country, whether developed or backward. The more backward provinces are the bigger the crisis, the news is closed, and the awareness of prevention is weak</td>
                <td>Jilin Province</td>
              </tr>
              <tr valign="top">
                <td>User 2</td>
                <td>January 24, 2021</td>
                <td>The most frightening thing is that the virus is still in its initial stage, and how it will mutate in the future is uncertain. What if it becomes more deadly</td>
                <td>Guangdong Province</td>
              </tr>
              <tr valign="top">
                <td>User 3</td>
                <td>April 8, 2021</td>
                <td>We call on the government to strictly control the price of masks and crack down on the bad behavior of price gouging by bad businesses in special periods</td>
                <td>Beijing City</td>
              </tr>
              <tr valign="top">
                <td>User 4</td>
                <td>June 17, 2021</td>
                <td>With such attention and rapid action and scientific prevention and treatment, the country will be able to effectively control and defeat the COVID-19 pandemic</td>
                <td>Sichuan Province</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <p>Issues with the crawled text data included missing column values, numerical anomalies, and special symbols, which need to be supplemented and filtered. First, the Jieba word segmentation tool was used to process the text segmentation, and a custom dictionary was established for the high-frequency words and proper nouns related to the pandemic. Second, the Trie tree structure was used for efficient word graph scanning, and a stop word list was established to filter out the noisy data from the text. Third, the text information was converted into vectors (ie, feature engineering transformation). The model can only input numbers (like vectors), not text; therefore, before running the model, any signal needed to be converted into a digital signal (eg, number, vector, matrix, tensor) that can be recognized by the model. The regular word segmentation result was used as the input for the model. The data preprocessing results are shown in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of a sample of annotated texts about COVID-19 in China before and after data preprocessing from July 2020 to June 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="460"/>
            <col width="460"/>
            <thead>
              <tr valign="top">
                <td>Number</td>
                <td colspan="2">Review</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Before pretreatment</td>
                <td>After pretreatment</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Now the pandemic is spreading, the whole network selling masks are doubling the price! Really no conscience!</td>
                <td>[pandemic], [spread], [sell], [mask], [price rise], [no], [conscience]</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>We should praise the medical staff and scientific researchers who are still struggling in the front line!</td>
                <td>[for], [struggle], [medical care], [scientific research], [personnel], [praise]</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>You don't need to panic, but you do need to be vigilant. If you notice suspicious symptoms, go to the hospital.</td>
                <td> [don't], [in], [but], [vigilant], [if], [notice], [symptoms], [to], [hospital], [medical]</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>The specific text data preprocessing process included (1) removing duplicate data, (2) removing useless characters, (3) using Jieba word segmentation, (4) removing stop words, (5) removing low-frequency words, (6) removing any tags such as HTML, and (7) using the decision tree method to supplement the missing column values and integrate the data.</p>
      </sec>
      <sec>
        <title>Sentiment Classification</title>
        <p>To reflect the characteristics of public opinion, a method based on dictionary and thesaurus matching was used. The Dalian University of Technology sentiment dictionary was used to scan the strings in the dictionary one by one and provide auxiliary annotations for sentiment classification. This method combines the characteristics of the topic and Chinese grammar and expands the original dictionary from the dimensions of “word,” “part of speech,” “word sense number,” “sentiment classification,” “intensity,” and “polarity.”</p>
        <p>The basic dictionary selected for this paper was the emotional vocabulary library of the Dalian University of Technology, which divides emotions into 7 categories (“anger,” “disgust,” “fear,” “sadness,” “surprise,” “good,” “happy”) and 21 subcategories. The initial emotional intensity was set to 5 levels (1, 3, 5, 7, 9), which is more detailed than other dictionaries. To facilitate sentiment calculation by a computer, we divided the polarity of sentiment words into 2 categories: positive (1) and negative (0). The formula for word sentiment is given in Equation 1:</p>
        <disp-formula><italic>s</italic>(<italic>w</italic>)=<italic>v</italic>(<italic>w</italic>)<italic>p</italic>(<italic>w</italic>) <bold>(1)</bold></disp-formula>
        <p>where <italic>s</italic>(<italic>w</italic>) represents the sentiment value of the term, <italic>v</italic>(<italic>w</italic>) is the sentiment intensity of the word, and <italic>p</italic>(<italic>w</italic>) denotes the sentiment polarity of the word.</p>
        <p>First, we needed to calculate the TF-IDF values of the words in the text to obtain the feature matrix. The TF-IDF value <inline-graphic xlink:href="jmir_v26i1e47508_fig7.png" xlink:type="simple" mimetype="image"/> of the feature item <italic>w</italic><sub>ij</sub> was calculated as shown in Equations 2-5:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e47508_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Here, <italic>w</italic><sub>ij</sub> represents the <italic>j</italic><sub>th</sub> word appearing in the microblog text-based data from the <italic>i</italic><sub>th</sub> day. <italic>C</italic>(<italic>w</italic><sub>ij</sub>) is the number of occurrences of the term <italic>w</italic><sub>ij</sub>. <italic>D</italic> is the total number of documents in the daily microblog text-based data &#124;<italic>D</italic><sub>i</sub>&#124; for the number of words in a document <italic>D</italic><sub>i</sub>. <inline-graphic xlink:href="jmir_v26i1e47508_fig9.png" xlink:type="simple" mimetype="image"/> denotes the bag-of-words vector of the document <italic>D</italic><sub>i</sub>. The function <italic>I</italic>(<italic>w</italic><sub>ij</sub>,<italic>D</italic><sub>i</sub>) takes the value 1 or 0, where 1 means that the document <italic>D</italic><sub>i</sub> contains the word <italic>w</italic><sub>ij</sub>; otherwise, it takes the value 0. The part-of-speech and sentiment classification of the words are shown in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Part-of-speech and sentiment classification of words in the text of a sample of microblog comments with the topic of COVID-19 pandemic behavior in China from July 2020 to June 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="210"/>
            <col width="130"/>
            <col width="190"/>
            <col width="80"/>
            <col width="70"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td>Word</td>
                <td>Part of speech</td>
                <td>Sentiment classification</td>
                <td>Intensity</td>
                <td>Polarity</td>
                <td>Auxiliary classification</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Anger</td>
                <td>Adjective</td>
                <td>NAU<sup>a</sup></td>
                <td>7</td>
                <td>–1</td>
                <td>NaN<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Fear</td>
                <td>Adjective</td>
                <td>NI<sup>c</sup></td>
                <td>5</td>
                <td>–1</td>
                <td>NG<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>Grief</td>
                <td>Verb</td>
                <td>NB<sup>e</sup></td>
                <td>7</td>
                <td>–1</td>
                <td>NJ<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td>Surprise</td>
                <td>Adjective</td>
                <td>PC<sup>g</sup></td>
                <td>6</td>
                <td>1</td>
                <td>NaN</td>
              </tr>
              <tr valign="top">
                <td>Hope</td>
                <td>Noun</td>
                <td>PD<sup>h</sup></td>
                <td>4</td>
                <td>1</td>
                <td>PH<sup>i</sup></td>
              </tr>
              <tr valign="top">
                <td>Pleasure</td>
                <td>Verb</td>
                <td>PA<sup>j</sup></td>
                <td>4</td>
                <td>1</td>
                <td>PE<sup>k</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>NAU: anger.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>NaN: null value.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>NI: panic.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>NG: shame.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>NB: sadness.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>NJ: disappointment.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>PC: surprise.</p>
            </fn>
            <fn id="table4fn8">
              <p><sup>h</sup>PD: respect.</p>
            </fn>
            <fn id="table4fn9">
              <p><sup>i</sup>PH: praise.</p>
            </fn>
            <fn id="table4fn10">
              <p><sup>j</sup>PA: happiness.</p>
            </fn>
            <fn id="table4fn11">
              <p><sup>k</sup>PE: peace of mind.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Feature Extraction</title>
        <p>This study used a topical term extraction method based on the combination of LDA and BERT. First, the review text was segmented into words, and the word vector was generated based on the hybrid pretrained model. The k-means clustering method was then used to cluster the word vectors, and the words with higher word frequencies were selected as representative words in each class. At the same time, other words in the same class were replaced by representative words to reduce the amount of input data. Finally, the biterm topic model (BTM) for short text was used to extract the topics.</p>
        <p>The BTM is aimed at the characteristics of short text and extracts more informative topics using co-occurrence patterns of word pairs in the whole corpus. The modeling process of the BTM generates a corpus of word pairs. Model training and model parameter inference are then performed based on the generated corpus. Finally, the topic distribution and word distribution on the corpus are obtained. If we suppose there are <italic>M</italic> feature words, &#124;<italic>B</italic>&#124; word pairs, and <italic>K</italic> topics in the corpus, then the corpus-level topic distribution is denoted by <italic>θ</italic>, the distribution of topic <italic>K</italic> is denoted by <italic>θ</italic><sub>k</sub>, and the word distribution is denoted by Ø, as shown in Equation 6.</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e47508_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>z</italic> represents a topic extracted, <italic>p</italic>(<italic>z</italic>=<italic>k</italic>) is the probability of occurrence of topic <italic>k</italic>, and <italic>p</italic>(<italic>z</italic>=<italic>k</italic>) is the probability of occurrence of words under topic <italic>k</italic>.</p>
        <p>Perplexity is widely used to measure the fitting effect of an LDA model. Perplexity is a probabilistic model evaluation metric that evaluates the predictive ability of a trained model on new unseen data. LDA perplexity is the perplexity when the model predicts new unseen text. After understanding the calculation of the statement probability, the perplexity of the statement <italic>s</italic>=<italic>w</italic><sub>1</sub>, <italic>w</italic><sub>2</sub>, <italic>w</italic><sub>3</sub>, ..., <italic>w</italic><sub>n</sub> can be defined as seen in Equation 7:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e47508_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>To calculate the perplexity of LDA, we needed to utilize the generation process of the model. Specifically, we needed to split the text data into training and test sets. The training set was used to build the LDA model, and the test set was used to calculate the perplexity of the model. In the test set, we treated each document as a sequence, taking one word at a time from the sequence and inferring the topic of that word based on the current model. Once the topic of the word was predicted, the model parameters were recalculated. In this process, we needed to calculate the perplexity of the text using the topic of the predicted word and the frequency of all the words.</p>
      </sec>
      <sec>
        <title>Sentiment Analysis</title>
        <p>The microblog comment text was divided into positive and negative sentiment, and the value represented the probability that the text contained that sentiment. The values ranged from 0 to 1; closer to 1 tended to be positive, and closer to 0 tended to be negative. To improve the accuracy of sentiment prediction, the sentiment classification model needed to be retrained. The main steps are described in the following paragraphs.</p>
        <p>First, the training method of the Bayesian model was used to train the sentiment classifier, and the classification method in the Bayesian classification was then used to predict the sentiment classification and test the accuracy of the model. Finally, the newly trained model was saved.</p>
        <p>The daily microblog text set <italic>D</italic>={<italic>d</italic><sub>1</sub>, <italic>d</italic><sub>2</sub>, ..., <italic>d</italic><sub>m</sub>}. The variable m represents the number of microblog crawls per day. The sentiment score <inline-graphic xlink:href="jmir_v26i1e47508_fig12.png" xlink:type="simple" mimetype="image"/> of each microblog was obtained using the newly trained sentiment analysis model. In the actual judgment, to make the visualization results more intuitive, the value of <inline-graphic xlink:href="jmir_v26i1e47508_fig12.png" xlink:type="simple" mimetype="image"/> was lowered by 1. That is, when the return value was in the interval of –1 to 0, the sentiment probability value was negative; when the return value was between 0 and 1, the sentiment probability value was positive. The sentiment score <italic>S</italic><sub>D</sub> was calculated as shown in Equation 8:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e47508_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>First, the feature words were inputted, and the sentiment polarity was classified as 1 (“positive”) or 0 (“negative”). Second, the TF-IDF calculation converted the words in the text into a word frequency matrix, and the matrix element <italic>a</italic>[<italic>i</italic>][<italic>j</italic>] represented the word frequency of <italic>j</italic> words in type <italic>i</italic> text. <italic>MemoryError</italic> was used to control the parameter. The transformer was then used to calculate the TF-IDF weight of each word. The first <italic>fit_transform</italic> was used to calculate the TF-IDF, and the second <italic>fit_transform</italic> was used to convert the text into a term frequency matrix to obtain all the words in the bag-of-words model. The TF-IDF matrix was then extracted. The element <italic>w</italic>[<italic>i</italic>][<italic>j</italic>] represented the TF-IDF weight of <italic>j</italic> word in the <italic>i</italic> type text, and the data were divided by combining the sparse matrix. Finally, the logistic regression classification method was used to calculate the model benchmark indicators. The binary classification ML-LR model is given in Equations 9 and 10:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e47508_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>w</italic> is the weight, <italic>b</italic> is the bias (<italic>b</italic> can be 0), and we can expand the weight and input vector a little, as shown in Equation 11:</p>
        <disp-formula><italic>w</italic>=(<italic>w</italic><sup>(1)</sup>, <italic>w</italic><sup>(2)</sup>, ..., <italic>w</italic><sup>(m)</sup>, b)<italic><sup>T</sup></italic>, <italic>X</italic>=(<italic>x</italic><sup>(1)</sup>, <italic>x</italic><sup>(2)</sup>, ..., <italic>x</italic><sup>(m)</sup>, 1)<italic><sup>T</sup></italic><bold>(11)</bold></disp-formula>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study did not require ethics approval because all data collected were publicly available. There is no means within this paper or its supporting materials to establish the identification of users and their corresponding tweets.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Analysis of the Change in Public Opinion Based on LDA Topic Modeling</title>
        <p>We identified 12 key dates as turning points for mood scores during this period. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows the general trend in the monthly sentiment changes of microblog users during the COVID-19 pandemic. In general, the overall classification results show obvious clustering according to the time series.</p>
        <p>In the first stage (July 2020 to October 2020), strong infectious tendencies and unknown virus characteristics exacerbated public panic, and user sentiment ratings were generally low. In the second phase (November 2020 to February 2021), changes in sentiment scores were relatively stable, except for a large decrease in January 2021 and February 2021, possibly due to material shortages and population movement that accelerated virus transmission. In the third phase (March 2021 to June 2021), the proportion of positive emotions gradually increased thanks to the successful development and widespread distribution of vaccines. The data show that the changes in the number of infected people and the effectiveness of pandemic prevention measures were important factors affecting the change in the polarity of public sentiment.</p>
        <p>In the face of the pandemic, people’s instinctive psychological reaction was panic and anxiety, and these negative emotions were particularly obvious in the first stage. In the second stage, government departments strengthened the control of public opinion on social media. As a result, the public’s confidence about fighting the pandemic was significantly enhanced, and the emotional polarity gradually improved. In the third stage, the proportion of positive emotions increased significantly, and negative emotions gradually turned into affirmation and support for antipandemic actions. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the keywords in the monthly hot topic searches on the microblogs during the COVID-19 pandemic.</p>
        <p>The SnowNLP tool was used to calculate the sentiment value, and the text was classified according to the threshold. The LDA model was then used to mine the topics. The hyperparameters of the model were set as α=50/<italic>K</italic> and γ=0.01. The number of iterations was set at 1000. The number of potential topics was determined as <italic>K</italic>=12 according to the perplexity. The same weight was given to the topics with a monthly cycle by combining the change in the intensity of the influence of the propagation of the microblog information.</p>
        <p>We obtained 12 topics from the feature extraction of microblog topics during the COVID-19 pandemic (see <xref ref-type="table" rid="table5">Table 5</xref>). Among them, pandemic prevention and people’s livelihoods were the focus of public attention.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Monthly distribution of the sentiment polarity in microblog comments about the COVID-19 pandemic in China from July 2020 to June 2021.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e47508_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Monthly statistics of the keywords in the hot topic searches regarding COVID-19 pandemic behavior in China on microblogs from July 2020 to June 2021.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e47508_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Topic feature extraction analysis of microblog comments about the COVID-19 pandemic at different stages in China from July 2020 to June 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="160"/>
            <col width="240"/>
            <col width="570"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Stage and subject number</td>
                <td>Subject content</td>
                <td>Keywords extracted by the LDA<sup>a</sup> model</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Phase 1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T1</td>
                <td>New confirmed cases</td>
                <td>Pandemic rebound, death, persistence, fear, migration, virus spread, contagion risk, pandemic prevention</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T2</td>
                <td>Import from abroad</td>
                <td>Disinfect, detect, security, risk level, classification, source control, viral activity, personnel contact</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T3</td>
                <td>Asymptomatic infection</td>
                <td>Isolation, therapy, monitor, incubation personal, close contacts, medical observe, infectiousness, immunity</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T4</td>
                <td>Nucleic acid testing (NAT)</td>
                <td>24 hours, region, sampling method, screening, antigen detection, sampling point, information input, detection result</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Phase 2</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T5</td>
                <td>Cold chain food</td>
                <td>Traceability, cheek, disinfect, safety, risk, touch, nucleic acid testing, virus</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T6</td>
                <td>COVID-19 vaccine</td>
                <td>Clinical trials, safety, side effect, output, validity, develop, price, inoculate</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T7</td>
                <td>Pandemic prevention measures</td>
                <td>Face mask, health, disinfect, body temperature, nucleic acid testing, immunity, quarantine, vaccine</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T8</td>
                <td>Medium or high-risk areas</td>
                <td>Cases, asymptomatic infection, spread risk, medical insurance, living necessities, regional control, nucleic acid testing, health monitoring</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Phase 3</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T9</td>
                <td>Vaccination</td>
                <td>Vaccination rate, necessity, population immunity, adverse reactions, antibody, allergy, personal protection, inoculation contraindication</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T10</td>
                <td>Cluster infection</td>
                <td>Source of infection, chain of transmission, close contacts, gathering activity, nucleic acid testing, pandemic prevention awareness, route of transmission, policy control</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T11</td>
                <td>Variant strain</td>
                <td>Omicron, Delta, critically patients, panic, contagious, pathogenic, mortality rate, drug resistance</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>T12</td>
                <td>Close contact</td>
                <td> Centralized isolation, medical observation, nucleic acid testing, body temperature, abnormal signs, incubation period, clinical symptoms, movement tracking</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>LDA: latent Dirichlet allocation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p><xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows the results of topic modeling. In the first stage, the main topics were mainly focused on pandemic prevention and control. In the second stage, in addition to vaccines, food safety became the focus. The core topics of the third stage were COVID-19 vaccination and variants.</p>
      </sec>
      <sec>
        <title>ML-LR Public Opinion Sentiment Analysis With a Sparse Matrix</title>
        <p>To explore the spatial distribution of public emotions, we subtracted the same types of emotions from provinces and regions in adjacent stages according to regional differences and obtained the spatiotemporal changes in negative emotions, as shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. Taking provincial administrative regions as the basic statistical unit and the 3 evolution stages as fixed time intervals, the natural break point method was used to divide the proportion of negative emotions.</p>
        <p>Neighboring provinces had similar emotional characteristics; that is, the spatiotemporal similarity of emotions was high. Moreover, the proportion of negative emotions was related to the severity of the local pandemic, and a high value for negative emotions had a spatial distribution similar to that of areas with a more severe pandemic situation. In addition, provinces with many confirmed cases had a high proportion of negative emotions, while provinces with a small number of confirmed cases had a relatively low proportion of negative emotions.</p>
        <p>Combined with China’s daily real-time notifications regarding the pandemic, the first stage of the pandemic in China occurred during the outbreak period, with the number of confirmed cases reaching a peak in August 2020 and the confirmed case rate gradually slowing down since then. As can be seen in <xref rid="figure5" ref-type="fig">Figure 5</xref>, during the period from July 2020 to January 2021, anger, disgust, fear, sadness, and other negative emotions prevailed. When combined with topic digging, this result shows that people’s concerns regarding the COVID-19 pandemic included fear of human-to-human transmission, a domestic outbreak, and imported cases from other countries.</p>
        <p>During the second stage (February 2021 to June 2021), the sentiment scores basically showed positive sentiment, indicating that the government adopted effective response policies and that public sentiment tended to stabilize again. Positive emotions such as surprise, good, and happy rapidly increased, indicating that the government invested considerable human and material resources at any cost. Therefore, the public was able to observe the determination and effectiveness of the country’s antipandemic efforts. In addition, with the arrival of the inflection point during the third phase of the pandemic, the mass distribution of COVID-19 vaccines increased public confidence that the pandemic could be prevented and controlled and that production, work, and prospects could resume after the pandemic.</p>
        <p>To visually present the words occurring at a high frequency for each emotion, we used a word cloud library to generate cloud maps of 70 words for 7 emotions corresponding to positive and negative emotions in the 3 stages, as shown in <xref ref-type="table" rid="table6">Table 6</xref>. At the same time, high-frequency words were combined with the corresponding current affairs hot spots to explore the differences in and connections of different semantic features in different sentiment classifications.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Geographical distribution of posts with negative sentiment by Chinese microblog users from July 2020 to June 2021: (A) phase 1, (B), phase 2, (C) phase 3.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e47508_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Bubble plot of the temporal trends in 7 sentiments based on microblog comments during the COVID-19 pandemic in China from July 2020 to June 2021.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e47508_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>The 7 emotional keywords based on COVID-19 pandemic topics found in microblog comment data in China from July 2020 to June 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="400"/>
            <col width="0"/>
            <col width="570"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Topic and keywords</td>
                <td>Number of times used</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Topic 1: happy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sanguine</td>
                <td colspan="2">474</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hope</td>
                <td colspan="2">399</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Happiness</td>
                <td colspan="2">307</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Relieved</td>
                <td colspan="2">301</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Believe</td>
                <td colspan="2">268</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pray</td>
                <td colspan="2">218</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Topic 2: good</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Health</td>
                <td colspan="2">535</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Security</td>
                <td colspan="2">490</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Safety</td>
                <td colspan="2">462</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Recovery</td>
                <td colspan="2">391</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Bless</td>
                <td colspan="2">307</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Great</td>
                <td colspan="2">274</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Topic 3: surprise</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unforeseen</td>
                <td colspan="2">367</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Vaccine</td>
                <td colspan="2">350</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Diagnose</td>
                <td colspan="2">329</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Outbreak</td>
                <td colspan="2">274</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pathogen</td>
                <td colspan="2">241</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cure</td>
                <td colspan="2">207</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Topic 4: sadness</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Epidemic</td>
                <td colspan="2">728</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Collapse</td>
                <td colspan="2">597</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hopelessness</td>
                <td colspan="2">582</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Despair</td>
                <td colspan="2">487</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Negative</td>
                <td colspan="2">424</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Chill</td>
                <td colspan="2">278</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Topic 5: fear</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Virus</td>
                <td colspan="2">624</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Infect</td>
                <td colspan="2">619</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Danger</td>
                <td colspan="2">481</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Trepidation</td>
                <td colspan="2">447</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Horror</td>
                <td colspan="2">410</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Helpless</td>
                <td colspan="2">368</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Topic 6: disgust</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rumor</td>
                <td colspan="2">394</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Oppose</td>
                <td colspan="2">350</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Keep away</td>
                <td colspan="2">311</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Shirk</td>
                <td colspan="2">263</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Back off</td>
                <td colspan="2">231</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Slackness</td>
                <td colspan="2">196</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Topic 7: anger</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hurt</td>
                <td colspan="2">368</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Indignation</td>
                <td colspan="2">359</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Conceal</td>
                <td colspan="2">301</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Forbid</td>
                <td colspan="2">248</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Malice</td>
                <td colspan="2">217</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ferocity</td>
                <td colspan="2">145</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>In the initial stage of the pandemic, the words occurring at a high frequency included “hopeless,” “danger,” “infect,” and other words as well as “negative,” “rumor,” “helpless,” and many other words that express emotions. In the face of the sudden COVID-19 pandemic, the public was in an extremely unstable emotional state.</p>
        <p>In the second stage of the pandemic, the words occurring at a high frequency included “keep away,” “pandemic,” and “pathogen.” Microblog topics focused on orderly prevention and control measures and a joint fight against the pandemic. The positive sentiment of netizens gradually increased, turning from fear to solidarity and cooperation.</p>
        <p>In the third stage of the pandemic, words such as “vaccine,” “recovery,” “hope,” and “sanguine” appeared frequently on the microblog. Most people were full of confidence and believed that China could win the battle against the pandemic, expressing positive feelings of positivity, unity, and hope for the future.</p>
      </sec>
      <sec>
        <title>Model Performance Analysis</title>
        <p>To verify that the proposed model and method had high accuracy, we calculated accuracy, precision, recall, and the <italic>F</italic><sub>1</sub>-score, as shown in Equations 12-15:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e47508_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>The words in the text were converted into a word frequency matrix, and the TF-IDF weight of each word was counted. The TF-IDF matrix was then extracted, and its weight was calculated. Finally, the effectiveness of the model was evaluated by extracting parameters (precision, recall, accuracy) from the confusion matrix, as shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>.</p>
        <p>First, the word frequency matrix was generated using the text-based data after Chinese word segmentation and data cleaning. The <italic>CountVectorizer</italic> class was called to calculate the word frequency matrix, and the generated matrix was <italic>X</italic>. Second, the <italic>TfidfTransformer</italic> class was called to calculate the TF-IDF value of the term frequency matrix <italic>X</italic>, and the <italic>Weight</italic> matrix was obtained. The <italic>Sklearn</italic> machine learning package was then called to perform the classification operation, the <italic>fit ()</italic> function was called to train, and the predicted class labels were assigned to the <italic>pre</italic> array. Finally, we called the <italic>PCA ()</italic> function of <italic>Sklearn</italic> to reduce these features to 2 dimensions corresponding to the <italic>X</italic> and <italic>Y</italic> axes, from which we could evaluate the algorithm.</p>
        <p>Based on the study of the traditional double classification confusion matrix, we divided the confusion matrix into 7 emotions according to the emotional characteristics. Very few data points were misclassified from the matrix by the model. Moreover, the performance of the model in terms of accuracy, recall, and the <italic>F</italic><sub>1</sub>-score of the weighted average of the 7 emotions “anger,” “disgust,” “fear,” “sad,” “surprise,” “good,” and “happy” was good, as shown in <xref ref-type="table" rid="table7">Table 7</xref>.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Multiclass confusion matrix for sentiment scoring based on COVID-19 pandemic-related topics in microblog comments in China from July 2020 to June 2021.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e47508_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Performance analysis of a machine learning logistic regression (ML-LR) language processing model fusing a sparse matrix based on Chinese COVID-19 pandemic topics from July 2020 to June 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="320"/>
            <col width="130"/>
            <col width="110"/>
            <col width="120"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td>Measures</td>
                <td>Precision</td>
                <td>Recall</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
                <td>Support</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Negative</td>
                <td>0.8447</td>
                <td>0.9525</td>
                <td>0.9189</td>
                <td>175</td>
              </tr>
              <tr valign="top">
                <td>Positive</td>
                <td>0.8158</td>
                <td>0.8961</td>
                <td>0.8344</td>
                <td>428</td>
              </tr>
              <tr valign="top">
                <td>Accuracy</td>
                <td>N/A<sup>a</sup></td>
                <td>N/A</td>
                <td>0.8581</td>
                <td>603</td>
              </tr>
              <tr valign="top">
                <td>Macro average</td>
                <td>0.8579</td>
                <td>0.6138</td>
                <td>0.6440</td>
                <td>603</td>
              </tr>
              <tr valign="top">
                <td>Weighted average</td>
                <td>0.7982</td>
                <td>0.7181</td>
                <td>0.7811</td>
                <td> 603</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study constructed a multidimensional analysis model of public opinion on social media in China during the COVID-19 pandemic from the perspective of spatiotemporal correlations. We proposed an ML-LR model with a sparse matrix to analyze the evolutionary process of public opinion and interpret the dynamic relationship and multidimensional emotional characteristics of public opinion in different spatiotemporal environments. The results show that, due to the different trends in the pandemic situation and prevention and control efforts in different regions, there were differences in the emotional characteristics of public opinion. The amount of public opinion data at many different levels was similar in the temporal and spatial distributions, and the amount of public opinion data was positively correlated with the number of new cases. With the rapid spread of the COVID-19 pandemic, the monthly amount of public opinion data increased. As the COVID-19 pandemic was gradually controlled, the amount of monthly public opinion data showed a downward “zigzag” trend. The spatial distribution of the amount of public opinion data was positively correlated with the distribution of COVID-19 spread, and the provinces with more public opinion data were mostly those areas with a more serious COVID-19 situation. This study can provide theoretical support and a practical reference for government and public health safety departments to deal with public health emergencies.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>In traditional research of public opinion on social media, the topic model method is often combined with a text clustering algorithm [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref25">25</xref>], but this approach is limited by the problem of insufficiently labeled data, making it difficult to reflect the changes in public mood. This paper proposes an ML-LR model with a fusion coefficient matrix to overcome these problems. In addition, social media data embedded with geographical location information provides valuable evaluation indicators for the study of public opinion characteristics, but the time span and geographical location range selected by traditional research of public opinion on social media are of limited size or are not representative [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. This study captures the dynamic changes in public sentiment characteristics through multidimensional, spatiotemporal analysis of public opinion.</p>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <p>We will conduct further research regarding 2 aspects in the future. On one hand, when analyzing public sentiment during a pandemic, user comments may come from multiple social media platforms [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Subsequent research will consider adding data from other social media platforms, strive to include a more comprehensive user group, and describe more appropriate regional characteristics of public opinion. On the other hand, considering that the spread of the pandemic has certain geospatial heterogeneity under the joint influence of geographical proximity, transportation network, and pandemic prevention measures, taking provincial administrative regions as geospatial measurement units has certain limitations [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>In future research, we will discuss how to obtain a multiscale visual analysis unit of the pandemic and public opinion according to the superposition of the transmission mode of the COVID-19 pandemic and multiscale geographic space. We will also mine the process of propagation during the pandemic from a more granular scale in terms of time and space.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The analytic method of public opinion on social media proposed in this study can effectively reflect the characteristics of public opinion in different regions and different periods of time. It can also provide theoretical support and a practical reference for the analysis of public opinion in major public health events, as well as provide correct guidance for government departments and effective control of the propagation of network public opinion.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Visual analysis of the COVID-19 topic modeling results based on microblog comments from July 2020 to June 2021 in China.</p>
        <media xlink:href="jmir_v26i1e47508_app1.png" xlink:title="PNG File , 1905 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BTM</term>
          <def>
            <p>biterm topic model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ML-LR</term>
          <def>
            <p>machine learning logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">TF-IDF</term>
          <def>
            <p>term frequency-inverse document frequency</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Social Science Fund of China (No. 23BGL306，20BTQ067); the Basic and Public Welfare Research Project of Zhejiang Province, China (No. LGJ21G010001); the Soft Science Research Program of Zhejiang, China (No. 2022C35018); Key R&#38;D Project in Zhejiang Province of China (No. 2022C01005, No. 2022C01144); and Major Project of Key Research Base of Humanities and Social Sciences of Ministry of Education (No. 22JJD790082).</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets containing confirmed COVID-19 cases and microblog comments are publicly available on the official websites of the Chinese Center for Disease Control and Prevention and the Sina microblog.</p>
        <p>The coded data and filtered files are made available on request from the corresponding author.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Analysis of public opinion evolution of COVID-19 based on LDA-ARMA hybrid model</article-title>
          <source>Complex Intell Systems</source>
          <year>2021</year>
          <month>09</month>
          <day>04</day>
          <volume>7</volume>
          <issue>6</issue>
          <fpage>3165</fpage>
          <lpage>3178</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34777976"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40747-021-00514-7</pub-id>
          <pub-id pub-id-type="medline">34777976</pub-id>
          <pub-id pub-id-type="pii">514</pub-id>
          <pub-id pub-id-type="pmcid">PMC8416577</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Priyadarshini</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Mohanty</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Puri</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>PK</given-names>
            </name>
          </person-group>
          <article-title>A study on the sentiments and psychology of twitter users during COVID-19 lockdown period</article-title>
          <source>Multimed Tools Appl</source>
          <year>2022</year>
          <month>06</month>
          <day>14</day>
          <volume>81</volume>
          <issue>19</issue>
          <fpage>27009</fpage>
          <lpage>27031</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34149302"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11042-021-11004-w</pub-id>
          <pub-id pub-id-type="medline">34149302</pub-id>
          <pub-id pub-id-type="pii">11004</pub-id>
          <pub-id pub-id-type="pmcid">PMC8200552</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Gallemore</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>An augmented multilingual Twitter dataset for studying the COVID-19 infodemic</article-title>
          <source>Soc Netw Anal Min</source>
          <year>2021</year>
          <month>10</month>
          <day>20</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>102</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34697560"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13278-021-00825-0</pub-id>
          <pub-id pub-id-type="medline">34697560</pub-id>
          <pub-id pub-id-type="pii">825</pub-id>
          <pub-id pub-id-type="pmcid">PMC8528187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Analysis of spatiotemporal characteristics of big data on social media sentiment with COVID-19 epidemic topics</article-title>
          <source>Chaos Solitons Fractals</source>
          <year>2020</year>
          <month>11</month>
          <volume>140</volume>
          <fpage>110123</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32834635"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.chaos.2020.110123</pub-id>
          <pub-id pub-id-type="medline">32834635</pub-id>
          <pub-id pub-id-type="pii">S0960-0779(20)30520-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC7367019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Analysis model of college students' mental health based on online community topic mining and emotion analysis in novel coronavirus epidemic situation</article-title>
          <source>Front Public Health</source>
          <year>2022</year>
          <month>9</month>
          <day>13</day>
          <volume>10</volume>
          <fpage>1000313</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36187685"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpubh.2022.1000313</pub-id>
          <pub-id pub-id-type="medline">36187685</pub-id>
          <pub-id pub-id-type="pmcid">PMC9516716</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ogbuokiri</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmadi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bragazzi</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Movahedi Nia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Mellado</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Orbinski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Asgary</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Public sentiments toward COVID-19 vaccines in South African cities: An analysis of Twitter posts</article-title>
          <source>Front Public Health</source>
          <year>2022</year>
          <month>8</month>
          <day>12</day>
          <volume>10</volume>
          <fpage>987376</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36033735"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpubh.2022.987376</pub-id>
          <pub-id pub-id-type="medline">36033735</pub-id>
          <pub-id pub-id-type="pmcid">PMC9412204</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Topics, sentiments, and emotions triggered by COVID-19-related tweets from Iran and Turkey official news agencies</article-title>
          <source>SN Comput Sci</source>
          <year>2021</year>
          <month>07</month>
          <day>29</day>
          <volume>2</volume>
          <issue>5</issue>
          <fpage>394</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34341778"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s42979-021-00789-0</pub-id>
          <pub-id pub-id-type="medline">34341778</pub-id>
          <pub-id pub-id-type="pii">789</pub-id>
          <pub-id pub-id-type="pmcid">PMC8319903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The impact factors of social media users' forwarding behavior of COVID-19 vaccine topic: Based on empirical analysis of Chinese Weibo users</article-title>
          <source>Front Public Health</source>
          <year>2022</year>
          <volume>10</volume>
          <fpage>871722</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36187642"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpubh.2022.871722</pub-id>
          <pub-id pub-id-type="medline">36187642</pub-id>
          <pub-id pub-id-type="pmcid">PMC9515960</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Albahli</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Twitter sentiment analysis: An Arabic text mining approach based on COVID-19</article-title>
          <source>Front Public Health</source>
          <year>2022</year>
          <month>10</month>
          <day>10</day>
          <volume>10</volume>
          <fpage>966779</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36299761"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpubh.2022.966779</pub-id>
          <pub-id pub-id-type="medline">36299761</pub-id>
          <pub-id pub-id-type="pmcid">PMC9589219</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarirete</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A bibliometric analysis of COVID-19 vaccines and sentiment analysis</article-title>
          <source>Procedia Comput Sci</source>
          <year>2021</year>
          <volume>194</volume>
          <fpage>280</fpage>
          <lpage>287</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1877-0509(21)02124-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.procs.2021.10.083</pub-id>
          <pub-id pub-id-type="medline">35013686</pub-id>
          <pub-id pub-id-type="pii">S1877-0509(21)02124-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC8730542</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elsaka</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Afyouni</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Hashem</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Al Aghbari</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Spatio-temporal sentiment mining of COVID-19 Arabic social media</article-title>
          <source>IJGI</source>
          <year>2022</year>
          <month>09</month>
          <day>02</day>
          <volume>11</volume>
          <issue>9</issue>
          <fpage>476</fpage>
          <pub-id pub-id-type="doi">10.3390/ijgi11090476</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Bhaumik</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nandi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>Sexually transmitted disease-related Reddit posts during the COVID-19 pandemic: latent Dirichlet allocation analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <month>10</month>
          <day>31</day>
          <volume>24</volume>
          <issue>10</issue>
          <fpage>e37258</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/10/e37258/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/37258</pub-id>
          <pub-id pub-id-type="medline">36219757</pub-id>
          <pub-id pub-id-type="pii">v24i10e37258</pub-id>
          <pub-id pub-id-type="pmcid">PMC9624277</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Box-Steffensmeier</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Moses</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Meaningful messaging: Sentiment in elite social media communication with the public on the COVID-19 pandemic</article-title>
          <source>Sci Adv</source>
          <year>2021</year>
          <month>07</month>
          <day>16</day>
          <volume>7</volume>
          <issue>29</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.science.org/doi/abs/10.1126/sciadv.abg2898?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/sciadv.abg2898</pub-id>
          <pub-id pub-id-type="medline">34261655</pub-id>
          <pub-id pub-id-type="pii">7/29/eabg2898</pub-id>
          <pub-id pub-id-type="pmcid">PMC8279499</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Multi-stage Internet public opinion risk grading analysis of public health emergencies: An empirical study on Microblog in COVID-19</article-title>
          <source>Inf Process Manag</source>
          <year>2022</year>
          <month>01</month>
          <volume>59</volume>
          <issue>1</issue>
          <fpage>102796</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34744256"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2021.102796</pub-id>
          <pub-id pub-id-type="medline">34744256</pub-id>
          <pub-id pub-id-type="pii">S0306-4573(21)00274-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC8556697</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Public attitudes toward COVID-19 vaccines on English-language Twitter: A sentiment analysis</article-title>
          <source>Vaccine</source>
          <year>2021</year>
          <month>09</month>
          <day>15</day>
          <volume>39</volume>
          <issue>39</issue>
          <fpage>5499</fpage>
          <lpage>5505</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34452774"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2021.08.058</pub-id>
          <pub-id pub-id-type="medline">34452774</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(21)01106-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC8439574</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yousefinaghani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dara</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mubareka</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Papadopoulos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sharif</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>An analysis of COVID-19 vaccine sentiments and opinions on Twitter</article-title>
          <source>Int J Infect Dis</source>
          <year>2021</year>
          <month>07</month>
          <volume>108</volume>
          <fpage>256</fpage>
          <lpage>262</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1201-9712(21)00462-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijid.2021.05.059</pub-id>
          <pub-id pub-id-type="medline">34052407</pub-id>
          <pub-id pub-id-type="pii">S1201-9712(21)00462-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC8157498</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ly</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kacker</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>She</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Revealing public opinion towards COVID-19 vaccines with Twitter data in the United States: spatiotemporal perspective</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>09</month>
          <day>10</day>
          <volume>23</volume>
          <issue>9</issue>
          <fpage>e30854</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/9/e30854/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/30854</pub-id>
          <pub-id pub-id-type="medline">34346888</pub-id>
          <pub-id pub-id-type="pii">v23i9e30854</pub-id>
          <pub-id pub-id-type="pmcid">PMC8437406</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Albaradei</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Alamro</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salhi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alshehri</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jaber</surname>
              <given-names>II</given-names>
            </name>
            <name name-style="western">
              <surname>Tifratene</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gojobori</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Duarte</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Rise and fall of the global conversation and shifting sentiments during the COVID-19 pandemic</article-title>
          <source>Humanit Soc Sci Commun</source>
          <year>2021</year>
          <month>05</month>
          <day>17</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1057/s41599-021-00798-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rempel</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Carenini</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Janjua</surname>
              <given-names>NZ</given-names>
            </name>
          </person-group>
          <article-title>Tracking COVID-19 discourse on Twitter in North America: infodemiology study using topic modeling and aspect-based sentiment analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>02</month>
          <day>10</day>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>e25431</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/2/e25431/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25431</pub-id>
          <pub-id pub-id-type="medline">33497352</pub-id>
          <pub-id pub-id-type="pii">v23i2e25431</pub-id>
          <pub-id pub-id-type="pmcid">PMC7879725</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Storey</surname>
              <given-names>VC</given-names>
            </name>
            <name name-style="western">
              <surname>O'Leary</surname>
              <given-names>DE</given-names>
            </name>
          </person-group>
          <article-title>Text analysis of evolving emotions and sentiments in COVID-19 Twitter communication</article-title>
          <source>Cognit Comput</source>
          <year>2022</year>
          <month>07</month>
          <day>28</day>
          <fpage>1</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35915743"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s12559-022-10025-3</pub-id>
          <pub-id pub-id-type="medline">35915743</pub-id>
          <pub-id pub-id-type="pii">10025</pub-id>
          <pub-id pub-id-type="pmcid">PMC9330938</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhashmi</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Khedr</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Arif</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>El Bannany</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Using a hybrid-classification method to analyze Twitter data during critical events</article-title>
          <source>IEEE Access</source>
          <year>2021</year>
          <volume>9</volume>
          <fpage>141023</fpage>
          <lpage>141035</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2021.3119063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Weibo users' emotion and sentiment orientation in traditional Chinese medicine (TCM) during the COVID-19 pandemic</article-title>
          <source>Disaster Med Public Health Prep</source>
          <year>2022</year>
          <month>10</month>
          <volume>16</volume>
          <issue>5</issue>
          <fpage>1835</fpage>
          <lpage>1838</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34369351"/>
          </comment>
          <pub-id pub-id-type="doi">10.1017/dmp.2021.259</pub-id>
          <pub-id pub-id-type="medline">34369351</pub-id>
          <pub-id pub-id-type="pii">S1935789321002597</pub-id>
          <pub-id pub-id-type="pmcid">PMC8505818</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>SKW</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>DKW</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Exploring public response to COVID-19 on Weibo with LDA topic modeling and sentiment analysis</article-title>
          <source>Data Inf Manag</source>
          <year>2021</year>
          <month>01</month>
          <day>01</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>86</fpage>
          <lpage>99</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2543-9251(22)00024-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.2478/dim-2020-0023</pub-id>
          <pub-id pub-id-type="medline">35402850</pub-id>
          <pub-id pub-id-type="pii">S2543-9251(22)00024-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC8975181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Monolingual and multilingual topic analysis using LDA and BERT embeddings</article-title>
          <source>Journal of Informetrics</source>
          <year>2020</year>
          <month>08</month>
          <volume>14</volume>
          <issue>3</issue>
          <fpage>101055</fpage>
          <pub-id pub-id-type="doi">10.1016/j.joi.2020.101055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghasiya</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Okamura</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Investigating COVID-19 news across four nations: a topic modeling and sentiment analysis approach</article-title>
          <source>IEEE Access</source>
          <year>2021</year>
          <volume>9</volume>
          <fpage>36645</fpage>
          <lpage>36656</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2021.3062875</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Public attention about COVID-19 on social media: An investigation based on data mining and text analysis</article-title>
          <source>Pers Individ Dif</source>
          <year>2021</year>
          <month>06</month>
          <volume>175</volume>
          <fpage>110701</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33536695"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.paid.2021.110701</pub-id>
          <pub-id pub-id-type="medline">33536695</pub-id>
          <pub-id pub-id-type="pii">S0191-8869(21)00076-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7843112</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>NI</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>IH</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Islam</surname>
              <given-names>MN</given-names>
            </name>
          </person-group>
          <article-title>Leveraging machine learning to analyze sentiment from COVID-19 tweets: A global perspective</article-title>
          <source>Eng Rep</source>
          <year>2022</year>
          <month>09</month>
          <day>18</day>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>e12572</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36247344"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/eng2.12572</pub-id>
          <pub-id pub-id-type="medline">36247344</pub-id>
          <pub-id pub-id-type="pii">ENG212572</pub-id>
          <pub-id pub-id-type="pmcid">PMC9538004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naseem</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Razzak</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Khushi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eklund</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>COVIDSenti: a large-scale benchmark Twitter data set for COVID-19 sentiment analysis</article-title>
          <source>IEEE Trans. Comput. Soc. Syst</source>
          <year>2021</year>
          <month>8</month>
          <volume>8</volume>
          <issue>4</issue>
          <fpage>1003</fpage>
          <lpage>1015</lpage>
          <pub-id pub-id-type="doi">10.1109/tcss.2021.3051189</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Path extension similarity link prediction method based on matrix algebra in directed networks</article-title>
          <source>Computer Communications</source>
          <year>2022</year>
          <month>04</month>
          <volume>187</volume>
          <fpage>83</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1016/j.comcom.2022.02.002</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
