<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i1e24889</article-id>
      <article-id pub-id-type="pmid">33326408</article-id>
      <article-id pub-id-type="doi">10.2196/24889</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>A Novel Machine Learning Framework for Comparison of Viral COVID-19–Related Sina Weibo and Twitter Posts: Workflow Development and Content Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yu</surname>
            <given-names>Fei</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sun</surname>
            <given-names>Ruoyan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zowalla</surname>
            <given-names>Richard</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Shi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Public Health Sciences</institution>
            <institution>University of North Carolina at Charlotte</institution>
            <addr-line>9021 University City Blvd</addr-line>
            <addr-line>Charlotte, NC, 28223-0001</addr-line>
            <country>United States</country>
            <phone>1 8148800738</phone>
            <email>schen56@uncc.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2316-111X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Zhou</surname>
            <given-names>Lina</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1864-0527</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Yunya</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5159-1689</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Qian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2354-0208</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Ping</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1859-0710</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Kanlun</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3084-7168</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Ge</surname>
            <given-names>Yaorong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9576-0293</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Janies</surname>
            <given-names>Daniel</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff8" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7890-9906</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Public Health Sciences</institution>
        <institution>University of North Carolina at Charlotte</institution>
        <addr-line>Charlotte, NC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Data Science</institution>
        <institution>University of North Carolina at Charlotte</institution>
        <addr-line>Charlotte, NC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Business</institution>
        <institution>University of North Carolina at Charlotte</institution>
        <addr-line>Charlotte, NC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Journalism</institution>
        <institution>Hong Kong Baptist University</institution>
        <addr-line>Hong Kong</addr-line>
        <country>Hong Kong</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>School of Communications</institution>
        <institution>Elon University</institution>
        <addr-line>Elon, NC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Medical Informatics</institution>
        <institution>School of Public Health</institution>
        <institution>Jilin University</institution>
        <addr-line>Jilin</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Department of Software and Information System</institution>
        <institution>University of North Carolina at Charlotte</institution>
        <addr-line>Charlotte, NC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Department of Bioinformatics and Genomics</institution>
        <institution>University of North Carolina at Charlotte</institution>
        <addr-line>Charlotte, NC</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Shi Chen <email>schen56@uncc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>1</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>6</day>
        <month>1</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>1</issue>
      <elocation-id>e24889</elocation-id>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>10</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>3</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>24</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>3</day>
          <month>12</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Shi Chen, Lina Zhou, Yunya Song, Qian Xu, Ping Wang, Kanlun Wang, Yaorong Ge, Daniel Janies. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 06.01.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/1/e24889" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Social media plays a critical role in health communications, especially during global health emergencies such as the current COVID-19 pandemic. However, there is a lack of a universal analytical framework to extract, quantify, and compare content features in public discourse of emerging health issues on different social media platforms across a broad sociocultural spectrum.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to develop a novel and universal content feature extraction and analytical framework and contrast how content features differ with sociocultural background in discussions of the emerging COVID-19 global health crisis on major social media platforms.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We sampled the 1000 most shared viral Twitter and Sina Weibo posts regarding COVID-19, developed a comprehensive coding scheme to identify 77 potential features across six major categories (eg, clinical and epidemiological, countermeasures, politics and policy, responses), quantified feature values (0 or 1, indicating whether or not the content feature is mentioned in the post) in each viral post across social media platforms, and performed subsequent comparative analyses. Machine learning dimension reduction and clustering analysis were then applied to harness the power of social media data and provide more unbiased characterization of web-based health communications.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>There were substantially different distributions, prevalence, and associations of content features in public discourse about the COVID-19 pandemic on the two social media platforms. Weibo users were more likely to focus on the disease itself and health aspects, while Twitter users engaged more about policy, politics, and other societal issues.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We extracted a rich set of content features from social media data to accurately characterize public discourse related to COVID-19 in different sociocultural backgrounds. In addition, this universal framework can be adopted to analyze social media discussions of other emerging health issues beyond the COVID-19 pandemic.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>Twitter</kwd>
        <kwd>Sina Weibo</kwd>
        <kwd>content feature extraction</kwd>
        <kwd>cross-cultural comparison</kwd>
        <kwd>machine learning</kwd>
        <kwd>social media</kwd>
        <kwd>infodemiology</kwd>
        <kwd>infoveillance</kwd>
        <kwd>content analysis</kwd>
        <kwd>workflow</kwd>
        <kwd>communication</kwd>
        <kwd>framework</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Social media platforms are important communication channels for public engagement of various health issues [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. Through social media, the public can not only receive information from health agencies and news outlets about various health issues [<xref ref-type="bibr" rid="ref5">5</xref>] but also actively participate in web-based discussions with peers and influencers to exchange opinions about these issues [<xref ref-type="bibr" rid="ref6">6</xref>]. Social media platforms have been adopted in various health campaigns by both health agencies and concerned groups, including promotion of vaccination [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], exercise and healthy lifestyles, and smoking cessation [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>During health emergencies, especially global infectious disease pandemics, social media has been used substantially by both individuals and organizations. Social media platforms were frequently used during previous public health emergencies of international concern (PHEICs), such as the 2014 Ebola outbreak [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>] and the 2016 Zika pandemic [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Social media has also been intensively used during the current COVID-19 pandemic; <italic>COVID-19</italic> is currently the most mentioned keyword across all major social media platforms worldwide. Therefore, social media can be used for infodemiology studies [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref17">17</xref>] to better understand public concerns and make informed decisions regarding the COVID-19 pandemic as well.</p>
      <p>Health emergencies are seldom an isolated health or medical issue. Pandemics, including the current COVID-19 pandemic, are almost always intermingled with complicated interactions of underlying societal and cultural factors that vary within and among countries. Consequently, discussions of these pandemics on social media include content not restricted to health, as observed during the 2014 Ebola and 2016 Zika epidemics [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. During the current COVID-19 pandemic, it has also been demonstrated that various social and political issues are associated with the pandemic, including different views on nonpharmaceutical interventions (NPIs) such as mask-wearing, social distancing, and stay-at-home-orders [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p>
      <p>To extract and analyze various content features in social media posts, natural language processing (NLP) methods such as linguistic inquiry and word count (LIWC) are usually applied [<xref ref-type="bibr" rid="ref30">30</xref>]. However, although LIWC can cover a broad spectrum of topic features, it was not specifically designed for health-related topics. LIWC places more emphasis on psychological processes [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. In addition, LIWC was developed almost exclusively in the Western sociocultural context and may not work well when analyzing discussions outside Western societies. During the COVID-19 pandemic, many discussions have been taking place on social media platforms in non–English-speaking regions, such as the Sina Weibo platform in China [<xref ref-type="bibr" rid="ref33">33</xref>]. Alternative data-driven computational linguistic/NLP algorithms aim to deliver more natural insights directly from data, bypass various human assumptions, overcome lack of inclusiveness of features, and reduce potential bias [<xref ref-type="bibr" rid="ref34">34</xref>]. Examples of commonly used techniques include word embedding, such as word2vec and doc2vec [<xref ref-type="bibr" rid="ref35">35</xref>]. However, completely data-driven techniques can result in a lack of interpretability. For instance, the exact meanings of vectors resulting from the doc2vec algorithm are unclear, and it is usually used for classification purposes [<xref ref-type="bibr" rid="ref25">25</xref>]. Similar to LIWC, it is still challenging to use the Chinese language as an input into these data-driven algorithms without extensive data preprocessing, which may result in a loss of subtlety of the content of the original Chinese post.</p>
      <p>Because of these technical challenges, especially the lack of universally designed content analysis and feature extraction analytical workflow, few studies have compared social media discussion across different socio-cultural backgrounds with regard to the COVID-19 pandemic [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]. Cross-platform and cross-culture studies are infrequent and generally observational [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>].</p>
      <p>Therefore, we suggest that there is an emergent need to develop a more interpretable and universal content analytical workflow across a wide sociocultural spectrum during the current COVID-19 pandemic and future pandemics. Developing this analytical workflow will vastly expand our fine-grained understanding and characterization of the content features of discussions on health issues worldwide. Until such a workflow is achieved, we will not be able to effectively compare and contrast health communication patterns on different social media platforms worldwide. As such, we propose the following two major objectives in this study:</p>
      <list list-type="order">
        <list-item>
          <p>Develop a content feature extraction and coding scheme to characterize discussions about the current COVID-19 pandemic on major social media platforms across socio-cultural backgrounds (Twitter and Sina Weibo);</p>
        </list-item>
        <list-item>
          <p>Compare and contrast content features of the most shared viral social media posts on Twitter and Sina Weibo through a comprehensive analytical workflow with state-of-the-art machine learning techniques.</p>
        </list-item>
      </list>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Retrieval of Social Media Posts</title>
        <p>We acquired social media posts on both Sina Weibo (colloquially referred to as Weibo hereafter) and Twitter from January 6 to April 15, 2020, for a total of 100 days. The reasons we used the same sampling period for the two social media platforms were as follows. 1) It made the sampling process consistent and directly comparable; 2) this study focused more on sociocultural than specific geospatial locations. Weibo is almost exclusively used by Chinese users, while Twitter users cover a much wider range of geospatial regions. Given the very different sizes and patterns of the epidemic in different countries, we suggested that having a consistent sampling period could reduce confounding factors such as actual outbreak size and its influence on public perception of COVID-19.</p>
        <p>The Weibo posts were acquired via the application programming interface (API) of Hong Kong Baptist University in Python. We downloaded all Weibo posts during the sampling period without further sampling. There were around 4 million Weibo posts acquired and archived.</p>
        <p>The tweets were acquired directly from Twitter via a contract between the School of Data Science, the University of North Carolina at Charlotte, and Twitter. The tweets were not retrieved by the commonly used Twitter API or other commercial APIs. The tweets were a 1% sample; however, given the adequately large sample size (more than 10 million tweets), we believe that this sample is a good representation of public discourse regarding the ongoing pandemic on Twitter.</p>
        <p>The keywords used to retrieve social media posts were <italic>COVID19</italic>, <italic>nCOV19</italic>, <italic>SARSCoV2</italic>, their variants (<italic>novel pneumonia</italic>, <italic>SARS</italic>, <italic>SARS2</italic>, <italic>COVID</italic>, <italic>coronavirus</italic>), and other related medical/health terms (<italic>GGO</italic>, <italic>PHEIC</italic>, <italic>pandemic</italic>). Inappropriate, derogative, and discriminating terms such as <italic>WuhanVirus</italic>, <italic>WuhanPenumonia</italic>, and <italic>ChinaVirus</italic> were also included to increase the sample size for research purposes. Both original posts and reposts were retrieved if they included the search terms.</p>
      </sec>
      <sec>
        <title>Identification of Viral Posts</title>
        <p>“Viral” posts were defined as those with large numbers of shares (also known as “reposts,” “retweets,” etc) on different social media platforms. There are other ways to define viral posts, such as number of comments or number of likes. However, comments may not align with the content and intention of the original posts, while liking would not necessarily propagate the original post on social media. Sharing through reposting or retweeting indicates that the user acknowledged the value of the original post and actively participated in its dissemination on social media. Therefore, the number of shares was used to define viral posts.</p>
        <p>Nevertheless, the three types of potential definitions of “viral” post were highly correlated (Pearson correlation coefficient ρ&#62;0.8 for each pair of viral post definitions). For example, it was very common for a highly shared COVID-19 post to receive many likes and comments as well. Therefore, we suggest that focusing on one definition of “viral” post was able to provide sufficient insights for the other two definitions.</p>
        <p>To avoid oversampling during certain days when a cluster of viral posts occurred (ie, numerous posts occurred on the same day), we identified and selected the 5 most shared posts on Weibo and the 5 most shared posts on twitter throughout the sampling period. Practically, we ordered the posts by original posting date first and then ranked them based on the number of shares they received on each day. Due to the fast pace of social media, most viral posts received a majority of their reposts within a short period of time, and the overall lifespan of a viral post usually lasted less than 48 hours [<xref ref-type="bibr" rid="ref25">25</xref>]. Eventually, a total of 1000 viral COVID-19 social media posts were selected, 500 for Weibo and 500 for Twitter. Because of the relatively large sample size and size of content feature set (discussed next), we believe the sample size is adequate to provide accurate, granular characterization of viral social media posts regarding COVID-19.</p>
      </sec>
      <sec>
        <title>Extraction, Annotation, and Quantification of Content Features</title>
        <p>In this study, we developed a relatively novel and comprehensive content analysis workflow to characterize and quantify various content features of health-related social media posts. The creation of content features went through two rounds of iteration. In the first round, we used an open-coding approach to identify an initial set of features by manually analyzing a set of 200 randomly selected social media posts. Then, we randomly selected another set of 800 posts, combined them with the 200 posts, split them into 5 subsets (200 each), and asked five student coders to analyze them independently. The student coders were provided with the list of initial features and were all bilingual, with fluency in both Chinese and English. Moreover, the coders were asked to create new features if they were missing from the existing list. Finally, we refined the list of features based on our review, comparison, and evaluation of the coding results. A few new content features were discussed and added in this round. In the second round, we leveraged the refined features in screening, evaluating, characterizing, and validating a test set of 50 randomly selected posts by our research team. Note that posts in this set were not necessarily viral posts. As discussed later in this paper, randomly selecting posts increases the coverage of various topic contents in the posts. We performed several iterations of intercoder reliability analysis, discussions, and refinements to ensure clarity and consistency in the definition and coding scheme of the features. The intercoder reliability (kappa value) threshold was set as 0.8 before deploying more comprehensive coding. The coding scheme can be described concisely as follows.</p>
        <p>Each feature was 0-1 binary coded (ie, a post either had or did not have the specific content feature). This coding scheme is more objective and easier to interpret than LIWC because it only considers the presence of a specific content feature. In addition, because of the objectivity of the coding scheme, there is no need to translate the social media posts, as the subtlety in the original post may be lost during the translation process.</p>
        <p>The final version included a total of 77 content features that were grouped into 6 major categories, each major category with more specific features. The six major categories included clinical and epidemiological features (eg, mentioning any symptoms or signs, transmission, or diagnosis and testing); countermeasures and COVID-19–related resources (eg, mentioning face masks, other medical supplies, or disinfection); policies and politics (eg, mentioning social distancing or stay-at-home-orders); public reactions and societal impact (eg, preparedness, remote working, or college education); spatial scales (eg, local, state/provincial, national, or international); and social issues (eg, discrimination against certain countries, violence, uncivil language). Note that these content features were not mutually exclusive, and a post could have multiple features under the same or different major categories at the same time as long as the post contained the specific contents. For example, a single post could mention symptoms, diagnosis, risk factors, and clinical consequences. In addition, these content features were universally developed and objective; therefore, they could be applied in different sociocultural backgrounds without the need of translation, which is required in LIWC. The complete descriptions of these major categories and further specific contents within each major category are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>After the comprehensive coding scheme was established and the list of 77 content features was defined, we then coded the 1000 posts according to the coding scheme. For each post, the output was a 77-element 0-1 binary vector. A 1 indicated that the post mentioned the corresponding content feature, while a 0 indicated that the specific post did not mention that content feature. In general, the more 1’s (and hence, the fewer 0’s), the more diverse the topics contained in the post. Fewer 1’s indicated more focused topics in the post. The final output for the analytical workflow was a 1000 × 77 binary matrix that could be further divided into two 500 × 77 binary matrices representing viral Twitter and Sina Weibo groups, respectively.</p>
      </sec>
      <sec>
        <title>Descriptive Analysis of Viral COVID-19 Posts Across Social Media Platforms</title>
        <p>We applied descriptive analysis to quantify and contrast the prevalence of content features in the most viral COVID-19 posts across the social media platforms Weibo and Twitter. The prevalence was defined as the percentage of number of 1’s across all the sampled posts in each content feature. Prevalence was bounded between 0 (ie, none of the sampled posts mentioned the content feature) and 1 (ie, all posts mentioned the content feature). A larger prevalence indicated that the corresponding content feature was more frequently mentioned in the viral social media posts regarding COVID-19.</p>
        <p>We further applied a two-sample <italic>z</italic> test to investigate whether there was statistically significant differences in the two prevalence measures of the same content feature between Weibo and Twitter. Because the data were 0-1 binary instead of continuous, the <italic>z</italic> test was more appropriate than the <italic>t</italic> test or Kolmogorov-Smirnov test. The content features that had the most distinct prevalence measures between the two social media platforms were identified based on the <italic>z</italic> test.</p>
        <p>In addition to comparing different social media posts, we also studied the associations between content features on different social media platforms. Pairwise Pearson correlation was calculated between each pair of content features in both Twitter and Sina Weibo posts. Pairs with statistically significant associations (<italic>P</italic>&#60;.05) were identified. These analyses provide a comprehensive characterization on how viral COVID-19 content features are distributed and correlated differently on the two major social media platforms.</p>
      </sec>
      <sec>
        <title>Unsupervised Learning of Viral COVID-19 Posts Across Social Media Platforms</title>
        <p>To further investigate the distributions and relationships among multiple content features simultaneously, we applied the <italic>t</italic> distribution stochastic neighbor embedding (<italic>t</italic>-SNE) technique. <italic>t</italic>-SNE is a machine learning dimension reduction algorithm. In contrast to the more commonly used principal component analysis technique, <italic>t</italic>-SNE can handle data that are not normally distributed, as presented in this study (ie, binary data) and is also commonly used in other studies involving large and heterogeneous data (eg, bioinformatics data [<xref ref-type="bibr" rid="ref41">41</xref>]). Performing <italic>t</italic>-SNE provides a clear visualization of associations among content features in 2D space instead of the original complex 77-dimensional feature space.</p>
        <p><italic>t</italic>-SNE dimension reduction paved the way for subsequent clustering analysis. In this study, we applied unsupervised machine learning k-means clustering [<xref ref-type="bibr" rid="ref42">42</xref>]. Note that we created 6 major categories of content features for our own manual content coding effort. These 6 categories were based on our observation and discussion about the COVID-19 pandemic and public discourse on social media. Data-driven clustering analysis (also known as unsupervised learning), on the other hand, enables the data to “speak for themselves” (hence, “unsupervised”). Data-driven clustering provides a new angle of identifying possible aggregations of content features. For example, frequently concurrent content features may not necessarily be clustered under the same major manually created categories. <italic>k</italic>-means clustering does not require a priori information from researchers on how the features should be grouped; therefore, it reduces potential bias. The optimal <italic>k</italic> value to perform <italic>k</italic>-means clustering was determined by computing and inspecting the total within sum of squares (TWSS) with a wide range of <italic>k</italic> values from 1 to 20. Although larger <italic>k</italic> values are usually associated with smaller TWSSs, they increase the difficulty of interpreting the clusters. We examined and contrasted the clustering patterns of content features in the most viral COVID-19 posts on Twitter and Weibo.</p>
        <p>The complete workflow of extracting and analyzing viral COVID-19 posts on different social media platforms is conceptualized and presented in <xref rid="figure1" ref-type="fig">Figure 1</xref>. All analytical codes were developed in R 4.0.2 (R Project) with supporting packages of <italic>Rtsne</italic>, <italic>tidyverse</italic>, <italic>cluster</italic>, <italic>factoextra</italic>, <italic>gridExtra</italic>, <italic>wordcloud</italic>, <italic>tm</italic>, <italic>corrplot</italic>, and <italic>ggplot2</italic>. The codes and data are freely available upon request.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Conceptualized social media content feature extraction and analysis workflow. Sampling was performed with specific application programming interfaces in Python. Feature developing, extraction, and quantification were performed manually by our group. Subsequent analyses were performed in R.</p>
          </caption>
          <graphic xlink:href="jmir_v23i1e24889_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Description of Viral COVID-19–Related Social Media Posts on Sina Weibo and Twitter</title>
        <p>The most prevalent content features in Twitter (which has mostly Western users) were <italic>health agency</italic> (eg, CDC [US Centers for Disease Control and Prevention], NIH [National Institutes of Health], and WHO [World Health Organization]; 37.0%), <italic>violence</italic> (mostly related to domestic violence due to stay-at-home orders; 20.4%), <italic>international relationships</italic> (14.8%), <italic>misinformation</italic> (eg, mentioning <italic>misinformation</italic>, <italic>disinformation</italic>, <italic>hoax</italic>, <italic>fake news</italic>; 11.2%), <italic>stay-at-home order</italic> (11.0%), and <italic>vaccine</italic> (10.8%). The 10 most frequently mentioned content features on Twitter, along with their prevalence and ranking, are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref> (top panel). In general, prevalent COVID-19 content features on Twitter did not directly focus on the disease itself and the epidemic but rather on policies, politics, and other secondary societal issues, such as violence and discrimination. This finding reinforced the notion that COVID-19, like many large pandemics and emerging health issues, is not an isolated medical issue and is intertwined with complicated sociopolitical aspects. In particular, 2020 was a US presidential election year. Therefore, it was not surprising that US President Donald Trump and other former and current US office holders (eg, President Barack Obama, Vice President Joe Biden, Majority Leader Mitch McConnell, and House Speaker Nancy Pelosi) were frequently mentioned in COVID-19–related viral tweets. Given the partisan nature of the US political system, the <italic>Republican Party</italic> and <italic>Democratic Party</italic> were also consistently mentioned with COVID-19, mostly with the distinct views and countermeasures of these parties related to the pandemic. The most mentioned nonpolitician <italic>celebrity</italic> was Bill Gates, and mentions of his name were usually associated with content features of <italic>vaccines</italic> and <italic>misinformation</italic> (mostly vaccine-related conspiracy theories). <italic>Discrimination</italic> toward Chinese people, Asian Americans, and Asian people in general was also frequently mentioned. Note that these were content features and may not reflect actual discrimination and negative sentiments against these groups in the tweets. In fact, many viral tweets that mentioned <italic>discrimination</italic> features were advocating for the elimination of discrimination and xenophobia.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Top 10 content features and their prevalence on Twitter (top) and Weibo (bottom).</p>
          </caption>
          <graphic xlink:href="jmir_v23i1e24889_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In comparison, the most prevalent content features in Weibo were <italic>research</italic> (18%), <italic>transmission</italic> (17.8%), <italic>cases</italic> (17%), <italic>healthcare personnel</italic> (15.8%), and <italic>testing</italic> (12.8%). The top 10 most mentioned content features on Weibo, along with their prevalence and ranking, are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref> (bottom panel). Compared to Twitter users, Weibo users (who are mostly Chinese) were more likely to engage in discussion of disease-related content features. Among the 10 most common content features, only <italic>celebrity</italic> was not directly related to the disease itself. In other words, Chinese Weibo users tended to focus on COVID-19 as a health and medical issue rather than on the associated societal and political issues discussed in Western societies. Viral Weibo posts were much more likely to mention <italic>health personnel</italic> and pay tribute to health care workers. <italic>Research</italic> on the SARS-CoV-2 pathogen and its <italic>transmission</italic> among human populations were also frequently mentioned, demonstrating the public interest in the state-of-the-art understanding of the emerging health crisis. Because China experienced the original 2003 severe acute respiratory syndrome (SARS) outbreak, which was caused by severe acute respiratory syndrome coronavirus 1 (SARS-CoV-1), and COVID-19 was caused by a similar coronavirus (SARS-CoV-2), the <italic>history</italic> of the 2003 SARS outbreak was a recurrent theme in COVID-19 Weibo posts. The <italic>celebrities</italic> mentioned in posts related to COVID-19 on Weibo were also very different from those on Twitter. In general, viral Weibo posts mentioned pop culture idols (eg, singers, other performing artists, and sports stars), and the sentiment was almost always positive (eg, mentions of financial, resource, and emotional support for COVID-19–impacted regions and people provided by these celebrities).</p>
        <p>These results showed vastly different content features covered in viral posts between Weibo and Twitter, which reflected the vast differences in perception of COVID-19 in the corresponding two major sociocultural systems. In general, Twitter users (who mostly live in Western countries) were highly engaged in discussions with countermeasures, politics, and policies related to the COVID-19 pandemic. In comparison, Weibo users (mostly Chinese) tended to focus more on the disease itself, but not exclusively. Among the top 10 features, the only overlapping content feature between the two platforms was the <italic>local</italic> situation. Therefore, these findings reveal substantially different focuses on the COVID-19 pandemic in Chinese and Western societies, which were reflected in the most viral social media posts in cyberspace.</p>
      </sec>
      <sec>
        <title>Comparative Analysis of Content Features of Twitter and Sina Weibo</title>
        <p>We further provided a quantitative comparison of content features between the two social media platforms. Out of a total of 77 content features, 3 (4%) were absent from all of the 500 most viral tweets (<italic>comorbidity</italic>, <italic>eHealth</italic>, and <italic>suicide</italic>), and 6 (8%) were not present in any of the 500 most viral Weibo posts (<italic>constitution</italic>, <italic>curfew</italic>, <italic>remote working</italic>, <italic>major religion</italic>, <italic>discrimination against gender</italic>, and <italic>discrimination against religion</italic>). This result also implies that viral discussions of COVID-19 on Weibo had narrower but more focused content features. There was no intersection of missing features between the two major social media platforms.</p>
        <p>Two-sample <italic>z</italic> tests were used to further quantify between-platform differences for each content feature. Content features with zero prevalence (ie, never mentioned in viral social media posts on either platform) were removed to perform the <italic>z</italic> test correctly. Features having the most distinct prevalence between the two platforms were <italic>health agency</italic> (difference of prevalence [<italic>D</italic>]=0.25; Twitter minus Weibo; <italic>P</italic>&#60;.001), <italic>vaccine</italic> (<italic>D</italic>=–0.17, <italic>P</italic>&#60;.001), <italic>shelter-in-place</italic> (or lockdown, <italic>D</italic>=–0.11, <italic>P</italic>&#60;.001), <italic>cases</italic> (<italic>D</italic>=0.09, <italic>P</italic>&#60;.001), and <italic>stay-at-home order</italic> (<italic>D</italic>=0.10, <italic>P</italic>=.002). While many of these content features were among the top 20 mentioned on both social media platforms (<xref rid="figure2" ref-type="fig">Figure 2</xref>), we also observed that <italic>local</italic> situations, the only common top 10 feature in both platforms, actually had statistically significant differences (<italic>D</italic>=–0.11, <italic>P</italic>&#60;.001). <italic>Local</italic> was the 6th most mentioned content feature on Weibo and the 10th on Twitter. These quantitative findings can be explained by the different sociocultural backgrounds of the users of Twitter (Western) and Weibo (Chinese).</p>
        <p>Some features were also distributed similarly between the two social media platforms (ie, <italic>P</italic> values substantially greater than .05 based on the <italic>z</italic> test). Of them, <italic>preparedness</italic> (<italic>D</italic>&#60;0.01, <italic>P</italic>=.90), <italic>discrimination against ethnicity</italic> (<italic>D</italic>&#60;0.01, <italic>P</italic>=.96), <italic>prevention</italic> (<italic>D</italic>&#60;0.01, <italic>P</italic>=.97), <italic>recovery</italic> (<italic>D</italic>&#60;0.01, <italic>P</italic>=.97), <italic>ecosystem</italic> (<italic>D</italic>&#60;0.01, <italic>P</italic>=.97), <italic>masks</italic> (<italic>D</italic>&#60;0.01, <italic>P</italic>&#62;.99), and <italic>Trump</italic> (<italic>D</italic>&#60;0.01, <italic>P</italic>&#62;.99) were the least distinct features. These features represent the common ground regarding COVID-19 between the two social media platforms and the two underlying sociocultural systems.</p>
        <p>The missing content features revealed a discrepancy between viral and nonviral discussions of COVID-19 on social media. As mentioned earlier, the comprehensive content feature coding scheme was originally developed from a random sample of posts, most of which were nonviral posts with &#60;5 reposts. We speculated that certain controversial content features (especially those related to policy and politics on Twitter) facilitated the spread of certain posts on social media and caused them to go viral. Posts that are less controversial typically do not gain much attention and do not go viral on social media. However, we must point out that content features are only one reason that a post can go viral. Other aspects include temporality (ie, when the post was published relative to the epidemic), property of the original posting user (eg, number of followers), and the severity of the pandemic at that time and place.</p>
        <p>Significant Pearson correlations (<italic>P</italic>&#60;.05) are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref> for Twitter (left) and Weibo (right) posts, respectively. In general, significantly correlated content feature pairs were more abundant on Weibo than on Twitter. One possible explanation is that Twitter has a 280-character length limit for posts. Therefore, content features in each tweet were limited, and concurrent content features in the same tweet were less frequent. On the other hand, Sina Weibo allows up to 2000 characters; therefore, it is possible to include much more content in a Weibo post than in a tweet. Consequently, a Weibo post can accommodate more content features than a tweet. Viral COVID-19 tweets included an average of 2.37 content features, and viral Weibo posts contained 2.78 content features. However, most viral Weibo posts used URLs to pack in more information and keep the post concise rather than including everything in the main post content. Therefore, the 2000 character limit is only a theoretical upper limit and was rarely reached, especially for viral Weibo posts.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Significant Pearson correlations (<italic>P</italic>&#60;.05) among content features on Twitter (left) and Weibo (right). The complete list of the 77 content features on the x- and y-axes can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </caption>
          <graphic xlink:href="jmir_v23i1e24889_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Note that Weibo is subject to censorship toward certain content features. For example, although US President <italic>Trump</italic> was mentioned quite a few times in viral Weibo posts, President Xi of China is not an allowed topic on Weibo and Chinese cyberspace in general. Therefore, there was no equivalent content feature to <italic>Trump</italic> on Weibo. Other <italic>political figures</italic> in China, such as the governor of Hubei (Yong Ying), are generally permitted by censors to be mentioned and commented on in Weibo posts.</p>
      </sec>
      <sec>
        <title>Dimension Reduction and Clustering Analysis of Content Features</title>
        <p>The machine learning dimension reduction <italic>t</italic>-SNE results for Twitter and Weibo are shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. These figures show how content features are distributed and associated in the reduced 2D space instead of the original 77-dimensional feature space. It is very clear that the content features have distinct distribution patterns between the two social media platforms in the reduced 2D space. This reinforces our previous findings on the variability of content features across the sociocultural spectrum.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p><italic>t</italic>-SNE results for viral COVID-19 tweets (T) and Weibo posts (W).</p>
          </caption>
          <graphic xlink:href="jmir_v23i1e24889_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The number of optimal clusters on Twitter (<italic>k</italic><sub>t</sub>) was determined as 6 from <xref rid="figure5" ref-type="fig">Figure 5</xref> (left), while the number of optimal clusters (<italic>k</italic><sub>s</sub>) on Weibo was found to be 5 from <xref rid="figure5" ref-type="fig">Figure 5</xref> (right). Therefore, not only were content features regarding COVID-19 distributed differently between the two social media platforms, but their associations (eg, clusters) within posts were also distinct between the two platforms. Note that these clusters were identified by the data-driven unsupervised machine learning technique, and these clusters did not necessarily align with the 6 manually developed major categories.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Numbers of clusters and within-group sums of squares for tweets (left) and Weibo posts (right).</p>
          </caption>
          <graphic xlink:href="jmir_v23i1e24889_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>We further show the <italic>k</italic>-means clustering results of the content features on Twitter and Weibo in <xref rid="figure6" ref-type="fig">Figure 6</xref> (left and right, respectively). The clustering patterns were substantially different between the two social media platforms. The sizes of the 6 distinct clusters on Twitter were 154, 107, 96, 62, 42, and 39. The total sum of squares (TSS) across all 6 clusters was 1402. The total within-cluster sum of squares (TWSS) was 1079, and the total between-cluster sum of squares (TBSS) was 323 on Twitter. Note that TSS = TWSS + TBSS. In comparison, the 5 cluster sizes of Weibo posts were 218, 106, 81, 67, and 28. The TSS, TWSS, and TBSS on Weibo were 1262, 1034, and 228, respectively. Therefore, all sums of squares were much smaller on Weibo than on Twitter. In addition, the two dimensions (the x- and y-axes in <xref rid="figure6" ref-type="fig">Figure 6</xref>) were also much smaller on Twitter (3.2% and 3%) than on Weibo (4.5% and 4%). All these results reveal that COVID-19 content features in viral Weibo posts were more similar across different posts than those in Twitter posts. Twitter showed a more diverse array of content features among different tweets.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p><italic>k</italic>-means clustering on viral tweets (left) and Weibo posts (right).</p>
          </caption>
          <graphic xlink:href="jmir_v23i1e24889_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Theoretical Innovation</title>
        <p>This study is the first of its kind to comprehensively characterize the content features of discussions regarding a large pandemic on social media across the sociocultural spectrum. We showed the vast differences in topic content features of viral social media posts in Twitter and Weibo, the two most influential social media platforms in China and the West during the COVID-19 pandemic. In general, viral social media posts in China focused on cases and prevention, which are topics that are more related to COVID-19 as a health issue. However, as a comparison, most viral tweets regarding COVID-19 focused more on policies and politics, including <italic>stay-at-home</italic> orders, President <italic>Trump</italic>, and other <italic>political figures</italic>. Through various analytical methods, social media data provided a new angle to explore and understand public discourse of the COVID-19 pandemic and associated social, political, and economic issues. Details of these discussions in virtual cyberspace may provide insights on the actual disease epidemic in the real world. For example, analyzing public perception of various NPIs, such as <italic>social distancing</italic>, mandatory <italic>mask-wearing</italic>, and <italic>stay-at-home</italic> orders, would provide an estimation of the compliance with these NPIs, which determine the case counts and epidemic trajectory in a region. This concept echoes the original idea of infodemiology, which uses a time series of social media post counts related to a health issue (eg, COVID-19) as an indicator of actual case counts [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. In addition to the number of social media posts, we will be able to further extract fine-grained perceptions of the risk and NPIs of COVID-19 and extend the application of infodemiology.</p>
      </sec>
      <sec>
        <title>Technical Advances</title>
        <p>To achieve an effective comparison across the sociocultural spectrum regarding the COVID-19 pandemic on social media, we developed a comprehensive content analytical workflow. This analytical workflow was specifically designed for transboundary infectious diseases (eg, outbreaks and pandemics of infectious diseases) that have complicated sociocultural contexts. Compared to the commonly used LIWC [<xref ref-type="bibr" rid="ref31">31</xref>], our workflow, especially the coding scheme, has several advantages. First, LIWC is a general content analytical tool that ignores many important content features during the COVID-19 pandemic. Our coding scheme is tailored to the complicated and interacting health, social, cultural, and political nuances of transboundary infectious diseases. Therefore, our coding scheme is able to capture a much more comprehensive and detailed content features in web-based discussions regarding transboundary infectious diseases. Second, LIWC uses proprietary algorithms to calculate individual scores of different features, and the exact interpretation of the numeric values is not readily comprehensible. In contrast, our coding scheme is 0-1 binary, where 1 indicates that the content has a feature and 0 indicates that it does not. This coding is clearer than the obscure LIWC scores. In addition, LIWC scores vary substantially (from 0 to 100) among different features. Certain features that have large values in LIWC tend to dominate and overshadow other features; thus, further analysis is prone to bias. Our coding scheme is consistent, as all features have the same coding scheme. Finally, LIWC is difficult to directly apply to non–Indo-European languages; therefore, direct comparison between sociocultural contexts with LIWC is almost impossible. In contrast, our coding scheme is context-free and can be applied to virtually any language and any region. The coding scheme itself is also flexible. Researchers can easily add and modify content features as necessary when working with other health issues beyond COVID-19. The coding scheme can be retrofitted to understand communications on previous events (eg, the 2016 Zika event). We can easily add, remove, or revise corresponding content features related to the specific health issues we are exploring.</p>
      </sec>
      <sec>
        <title>Limitations of the Current Study and Future Directions</title>
        <p>This study adopts a static view of all viral social media posts for comparative analysis between two sides of the sociocultural spectrum in a given period of time. However, for a large and ongoing pandemic, time is another major influential factor that is associated with the actual progress of the pandemic. Our previous studies showed that the Zika case series was strongly associated with the Zika discussion trend on Twitter in 2016 [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Similarly, future studies can be expended to explicitly characterize how various content features evolve with time in different regions. The ongoing COVID-19 pandemic case series can be predicted by certain content features (eg, regarding NPIs), similar to the previously discussed infodemiology approach.</p>
        <p>We used the number of reposts (ie, retweets or shares) as the definition of a viral social media post. One limitation is that we did not consider the possibility of automatic reposting by bots or cyborgs. Therefore, it is possible that the large number of reposts may not accurately represent and reflect the public perception of an issue. Bots and cyborgs, however, are not necessarily associated with misinformation. Bots and cyborgs can be used as tools to quickly disseminate information on social media platforms for other reasons, such as advertising. A future direction of this study is to identify other definitions of viral posts (eg, posts with a large number of likes, favorites, or comments).</p>
        <p>Viral social media posts are only one of many attributes of social media discussion. Our initial assessment showed that &#62;75% of tweets and &#62;80% of Weibo posts regarding COVID-19 did not receive any attention on social media. This number is similar to our previous finding that 76% of all Zika-related tweets were never retweeted [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
        <p>To characterize web-based public discourse related to COVID-19 and other emerging health issues accurately and comprehensively, we will continue studying these nonviral social media posts on different platforms. However, given the ever-increasing volume of social media posts, effective sampling strategies are a priority. Effective sampling is a necessity to provide a less biased depiction of content features. Data mining of nonviral posts regarding COVID-19, especially on sentiment toward NPIs, will provide a more accurate estimation of compliance with NPIs in different regions at different stages of the pandemic. We will also be able to further compare and contrast how the distributions of content features differ between viral and nonviral post groups as well as across the sociocultural spectrum.</p>
        <p>In this study, we depict how NPIs of COVID-19 have been mentioned on social media across the sociocultural spectrum. Because this study focuses on providing a neutral and objective characterization of content features in COVID-19–related discussions, it does not consider subjective sentiment toward specific NPIs. However, individual and societal perception toward NPIs can be strong influencing factors during the COVID-19 pandemic. For instance, positive sentiment toward <italic>mask-wearing</italic> and <italic>social distancing</italic> may reflect actual compliance with these NPIs in society and hence help reduce the risk of transmission. On the other hand, negative sentiment toward these NPIs may lead to noncompliance and facilitate COVID-19 transmission in the real world. In a future study, we will further integrate objective content features and corresponding sentiment and/or emotion to provide a more comprehensive understanding of public perceptions.</p>
        <p>Finally, this study relies on human coding of content features, which is substantially labor-intensive. For instance, adequate and proper training is required to achieve high intercoder reliability before each coder can perform independently. In comparison, the LIWC algorithm is automated and relatively easy to use. We are still at the early development stage of a novel analytical workflow that is similar to LIWC. We expect to develop at least a semiautomated and semisupervised machine learning method for quick and effective web-based health information processing and annotation. To achieve this ambitious goal, we envision a crowd-sourcing approach that will enable ardent citizen scientists and volunteers worldwide to help further manually code more social media posts, create an even larger corpus, and develop state-of-the-art semisupervised or supervised machine learning pipelines to automate the process. The eventual product will be able to automatically extract content features from social media posts regarding health issues and can further guide effective health communications during emergencies.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Coding book of COVID-19 content features.</p>
        <media xlink:href="jmir_v23i1e24889_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 155 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CDC</term>
          <def>
            <p>US Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">
            <italic>D</italic>
          </term>
          <def>
            <p>difference of prevalence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LIWC</term>
          <def>
            <p>linguistic inquiry and word count</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">NIH</term>
          <def>
            <p>National Institutes of Health</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NPI</term>
          <def>
            <p>nonpharmaceutical interventions</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PHEIC</term>
          <def>
            <p>public health emergency of international concern</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SARS</term>
          <def>
            <p>severe acute respiratory syndrome</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SARS-CoV-1</term>
          <def>
            <p>severe acute respiratory syndrome coronavirus 1</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10"><italic>t</italic>-SNE</term>
          <def>
            <p>t distribution stochastic neighbor embedding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">TBSS</term>
          <def>
            <p>total between-cluster sum of squares</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">TSS</term>
          <def>
            <p>total sum of squares</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">TWSS</term>
          <def>
            <p>total within sum of squares</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>SC is supported by the Models of Infectious Disease Agent Study (MIDAS) COVID-19 urgent supplementary grant (MIDASUP2020-5). YS is supported by the Interdisciplinary Research Clusters Matching Scheme (IRCMS/19-20/D04) and the AI and Media Research Lab at Hong Kong Baptist University (SDF17-1013-P01). The authors are grateful for the help from Ms Mengyu Li and Mr Minghao Wang from Hong Kong Baptist University and Mr Evan Lai from St. Mark’s School of Texas for their participation in and contributions to this project. The authors are also grateful for the technical assistance from the School of Data Science, UNC Charlotte, in providing the social media data.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Capurro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cole</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Echavarría</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Joe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Neogi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>The use of social networking sites for public health practice and research: a systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>03</month>
          <day>14</day>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>e79</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/3/e79/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.2679</pub-id>
          <pub-id pub-id-type="medline">24642014</pub-id>
          <pub-id pub-id-type="pii">v16i3e79</pub-id>
          <pub-id pub-id-type="pmcid">PMC3971364</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moorhead</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Hazlett</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Irwin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hoving</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A new dimension of health care: systematic review of the uses, benefits, and limitations of social media for health communication</article-title>
          <source>J Med Internet Res</source>
          <year>2013</year>
          <month>04</month>
          <day>23</day>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>e85</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2013/4/e85/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1933</pub-id>
          <pub-id pub-id-type="medline">23615206</pub-id>
          <pub-id pub-id-type="pii">v15i4e85</pub-id>
          <pub-id pub-id-type="pmcid">PMC3636326</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eckert</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sopory</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wilkins</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Padgett</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Novak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Noyes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Alexander</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderford</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gamhewage</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Health-Related Disaster Communication and Social Media: Mixed-Method Systematic Review</article-title>
          <source>Health Commun</source>
          <year>2018</year>
          <month>12</month>
          <volume>33</volume>
          <issue>12</issue>
          <fpage>1389</fpage>
          <lpage>1400</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1080/10410236.2017.1351278"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/10410236.2017.1351278</pub-id>
          <pub-id pub-id-type="medline">28825501</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grajales</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sheps</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Novak-Lauscher</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Social media: a review and tutorial of applications in medicine and health care</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>02</month>
          <day>11</day>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>e13</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/2/e13/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.2912</pub-id>
          <pub-id pub-id-type="medline">24518354</pub-id>
          <pub-id pub-id-type="pii">v16i2e13</pub-id>
          <pub-id pub-id-type="pmcid">PMC3936280</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fielding</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>WWT</given-names>
            </name>
          </person-group>
          <article-title>Public Engagement and Government Responsiveness in the Communications About COVID-19 During the Early Epidemic Stage in China: Infodemiology Study on Social Media Data</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>05</month>
          <day>26</day>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>e18796</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/5/e18796/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18796</pub-id>
          <pub-id pub-id-type="medline">32412414</pub-id>
          <pub-id pub-id-type="pii">v22i5e18796</pub-id>
          <pub-id pub-id-type="pmcid">PMC7284407</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Palen</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>The evolving role of the public information officer: An examination of social media in emergency management</article-title>
          <source>J Homel Secur Emerg Manag</source>
          <year>2012</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>22</fpage>
          <pub-id pub-id-type="doi">10.1515/1547-7355.1976</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wiyeh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jaca</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mavundza</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ndwandwe</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wiysonge</surname>
              <given-names>CS</given-names>
            </name>
          </person-group>
          <article-title>Social media and HPV vaccination: Unsolicited public comments on a Facebook post by the Western Cape Department of Health provide insights into determinants of vaccine hesitancy in South Africa</article-title>
          <source>Vaccine</source>
          <year>2019</year>
          <month>10</month>
          <day>08</day>
          <volume>37</volume>
          <issue>43</issue>
          <fpage>6317</fpage>
          <lpage>6323</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.vaccine.2019.09.019"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2019.09.019</pub-id>
          <pub-id pub-id-type="medline">31521412</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(19)31219-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>AlKulaib</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Benton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Weaponized Health Communication: Twitter Bots and Russian Trolls Amplify the Vaccine Debate</article-title>
          <source>Am J Public Health</source>
          <year>2018</year>
          <month>10</month>
          <volume>108</volume>
          <issue>10</issue>
          <fpage>1378</fpage>
          <lpage>1384</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2018.304567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naslund</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Aschbrenner</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>McCulloch</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brunette</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Dallery</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bartels</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marsch</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>Systematic review of social media interventions for smoking cessation</article-title>
          <source>Addict Behav</source>
          <year>2017</year>
          <month>10</month>
          <volume>73</volume>
          <fpage>81</fpage>
          <lpage>93</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28499259"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.addbeh.2017.05.002</pub-id>
          <pub-id pub-id-type="medline">28499259</pub-id>
          <pub-id pub-id-type="pii">S0306-4603(17)30172-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5556947</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beall</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Hofer</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Schaller</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Infections and Elections: Did an Ebola Outbreak Influence the 2014 U.S. Federal Elections (and if so, How)?</article-title>
          <source>Psychol Sci</source>
          <year>2016</year>
          <month>05</month>
          <day>14</day>
          <volume>27</volume>
          <issue>5</issue>
          <fpage>595</fpage>
          <lpage>605</lpage>
          <pub-id pub-id-type="doi">10.1177/0956797616628861</pub-id>
          <pub-id pub-id-type="medline">26976083</pub-id>
          <pub-id pub-id-type="pii">0956797616628861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A gendered human rights analysis of Ebola and Zika: locating gender in global health emergencies</article-title>
          <source>Int Aff</source>
          <year>2016</year>
          <month>08</month>
          <day>31</day>
          <volume>92</volume>
          <issue>5</issue>
          <fpage>1041</fpage>
          <lpage>1060</lpage>
          <pub-id pub-id-type="doi">10.1111/1468-2346.12704</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adebayo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Neumark</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gesser-Edelsburg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Abu</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Levine</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Zika pandemic online trends, incidence and health risk communication: a time trend study</article-title>
          <source>BMJ Glob Health</source>
          <year>2017</year>
          <month>08</month>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>e000296</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29082006"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjgh-2017-000296</pub-id>
          <pub-id pub-id-type="medline">29082006</pub-id>
          <pub-id pub-id-type="pii">bmjgh-2017-000296</pub-id>
          <pub-id pub-id-type="pmcid">PMC5656128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Avery</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Public information officers’ social media monitoring during the Zika virus crisis, a global health threat surrounded by public uncertainty</article-title>
          <source>Public Relat Rev</source>
          <year>2017</year>
          <month>09</month>
          <volume>43</volume>
          <issue>3</issue>
          <fpage>468</fpage>
          <lpage>476</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pubrev.2017.02.018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology: the epidemiology of (mis)information</article-title>
          <source>Am J Med</source>
          <year>2002</year>
          <month>12</month>
          <volume>113</volume>
          <issue>9</issue>
          <fpage>763</fpage>
          <lpage>765</lpage>
          <pub-id pub-id-type="doi">10.1016/s0002-9343(02)01473-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance: framework for an emerging set of public health informatics methods to analyze search, communication and publication behavior on the Internet</article-title>
          <source>J Med Internet Res</source>
          <year>2009</year>
          <month>03</month>
          <day>27</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2009/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1157</pub-id>
          <pub-id pub-id-type="medline">19329408</pub-id>
          <pub-id pub-id-type="pii">v11i1e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC2762766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance tracking online health information and cyberbehavior for public health</article-title>
          <source>Am J Prev Med</source>
          <year>2011</year>
          <month>05</month>
          <volume>40</volume>
          <issue>5 Suppl 2</issue>
          <fpage>S154</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.amepre.2011.02.006"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2011.02.006</pub-id>
          <pub-id pub-id-type="medline">21521589</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(11)00088-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chew</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pandemics in the age of Twitter: content analysis of Tweets during the 2009 H1N1 outbreak</article-title>
          <source>PLoS One</source>
          <year>2010</year>
          <month>11</month>
          <day>29</day>
          <volume>5</volume>
          <issue>11</issue>
          <fpage>e14118</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0014118"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0014118</pub-id>
          <pub-id pub-id-type="medline">21124761</pub-id>
          <pub-id pub-id-type="pmcid">PMC2993925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brenner</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Inbar</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Disgust sensitivity predicts political ideology and policy attitudes in the Netherlands</article-title>
          <source>Eur J Soc Psychol</source>
          <year>2014</year>
          <month>11</month>
          <day>15</day>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>27</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1002/ejsp.2072</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Faulkner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schaller</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Duncan</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>Evolved Disease-Avoidance Mechanisms and Contemporary Xenophobic Attitudes</article-title>
          <source>Group Process Intergroup Relat</source>
          <year>2004</year>
          <month>09</month>
          <day>20</day>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>333</fpage>
          <lpage>353</lpage>
          <pub-id pub-id-type="doi">10.1177/1368430204046142</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernández-Luque</surname>
              <given-names>Luis</given-names>
            </name>
            <name name-style="western">
              <surname>Bau</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Health and social media: perfect storm of information</article-title>
          <source>Healthc Inform Res</source>
          <year>2015</year>
          <month>04</month>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>67</fpage>
          <lpage>73</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.e-hir.org/DOIx.php?id=10.4258/hir.2015.21.2.67"/>
          </comment>
          <pub-id pub-id-type="doi">10.4258/hir.2015.21.2.67</pub-id>
          <pub-id pub-id-type="medline">25995958</pub-id>
          <pub-id pub-id-type="pmcid">PMC4434065</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gui</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pine</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Conspiracy Talk on Social Media</article-title>
          <source>Proc ACM Hum-Comput Interact</source>
          <year>2017</year>
          <month>12</month>
          <day>06</day>
          <volume>1</volume>
          <issue>CSCW</issue>
          <fpage>1</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1145/3134696</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oyeyemi</surname>
              <given-names>SO</given-names>
            </name>
            <name name-style="western">
              <surname>Gabarron</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wynn</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Ebola, Twitter, and misinformation: a dangerous combination?</article-title>
          <source>BMJ</source>
          <year>2014</year>
          <month>10</month>
          <day>14</day>
          <volume>349</volume>
          <fpage>g6178</fpage>
          <pub-id pub-id-type="doi">10.1136/bmj.g6178</pub-id>
          <pub-id pub-id-type="medline">25315514</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shadgan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pakravan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zaeimkohan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shahpar</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Khodaee</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Zika and Rio Olympic Games</article-title>
          <source>Curr Sports Med Rep</source>
          <year>2016</year>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>298</fpage>
          <lpage>300</lpage>
          <pub-id pub-id-type="doi">10.1249/jsr.0000000000000278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Buchenberger</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bagavathi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fair</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shaikh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Krishnan</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Dynamics of Health Agency Response and Public Engagement in Public Health Emergency: A Case Study of CDC Tweeting Patterns During the 2016 Zika Epidemic</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2018</year>
          <month>11</month>
          <day>22</day>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e10827</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2018/4/e10827/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10827</pub-id>
          <pub-id pub-id-type="medline">30467106</pub-id>
          <pub-id pub-id-type="pii">v4i4e10827</pub-id>
          <pub-id pub-id-type="pmcid">PMC6284147</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Safarnejad</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bagavathi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krishnan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Identifying Influential Factors in the Discussion Dynamics of Emerging Health Issues on Social Media: Computational Study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>07</month>
          <day>28</day>
          <volume>6</volume>
          <issue>3</issue>
          <fpage>e17175</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/3/e17175/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17175</pub-id>
          <pub-id pub-id-type="medline">32348275</pub-id>
          <pub-id pub-id-type="pii">v6i3e17175</pub-id>
          <pub-id pub-id-type="pmcid">PMC7420635</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abd-Alrazaq</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alhuwail</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Househ</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hamdi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Top Concerns of Tweeters During the COVID-19 Pandemic: Infoveillance Study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>04</month>
          <day>21</day>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>e19016</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/4/e19016/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19016</pub-id>
          <pub-id pub-id-type="medline">32287039</pub-id>
          <pub-id pub-id-type="pii">v22i4e19016</pub-id>
          <pub-id pub-id-type="pmcid">PMC7175788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ataguba</surname>
              <given-names>OA</given-names>
            </name>
            <name name-style="western">
              <surname>Ataguba</surname>
              <given-names>JE</given-names>
            </name>
          </person-group>
          <article-title>Social determinants of health: the role of effective communication in the COVID-19 pandemic in developing countries</article-title>
          <source>Glob Health Action</source>
          <year>2020</year>
          <month>12</month>
          <day>31</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>1788263</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32657669"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/16549716.2020.1788263</pub-id>
          <pub-id pub-id-type="medline">32657669</pub-id>
          <pub-id pub-id-type="pmcid">PMC7480618</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lwin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sheldenkar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Global Sentiments Surrounding the COVID-19 Pandemic on Twitter: Analysis of Twitter Trends</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>05</month>
          <day>22</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19447</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e19447/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19447</pub-id>
          <pub-id pub-id-type="medline">32412418</pub-id>
          <pub-id pub-id-type="pii">v6i2e19447</pub-id>
          <pub-id pub-id-type="pmcid">PMC7247466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Puri</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Coomes</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Haghbayan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gunaratne</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Social media and vaccine hesitancy: new updates for the era of COVID-19 and globalized infectious diseases</article-title>
          <source>Hum Vaccin Immunother</source>
          <year>2020</year>
          <month>11</month>
          <day>01</day>
          <volume>16</volume>
          <issue>11</issue>
          <fpage>2586</fpage>
          <lpage>2593</lpage>
          <pub-id pub-id-type="doi">10.1080/21645515.2020.1780846</pub-id>
          <pub-id pub-id-type="medline">32693678</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennebaker</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Booth</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Francis</surname>
              <given-names>ME</given-names>
            </name>
          </person-group>
          <article-title>Linguistic Inquiry and Word Count: LIWC2015</article-title>
          <source>Pennebaker Conglomerates</source>
          <year>2015</year>
          <access-date>2020-12-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://s3-us-west-2.amazonaws.com/downloads.liwc.net/LIWC2015_OperatorManual.pdf">https://s3-us-west-2.amazonaws.com/downloads.liwc.net/LIWC2015_OperatorManual.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jiao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the Validity of Simplified Chinese Version of LIWC in Detecting Psychological Expressions in Short Texts on Social Network Services</article-title>
          <source>PLoS One</source>
          <year>2016</year>
          <volume>11</volume>
          <issue>6</issue>
          <fpage>e0157947</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0157947"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0157947</pub-id>
          <pub-id pub-id-type="medline">27322382</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-52776</pub-id>
          <pub-id pub-id-type="pmcid">PMC4920595</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guntuku</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pelullo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Volpp</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Studying expressions of loneliness in individuals using twitter: an observational study</article-title>
          <source>BMJ Open</source>
          <year>2019</year>
          <month>11</month>
          <day>04</day>
          <volume>9</volume>
          <issue>11</issue>
          <fpage>e030355</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=31685502"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2019-030355</pub-id>
          <pub-id pub-id-type="medline">31685502</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2019-030355</pub-id>
          <pub-id pub-id-type="pmcid">PMC6830671</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Cuomo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Purushothaman</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Data Mining and Content Analysis of the Chinese Social Media Platform Weibo During the Early COVID-19 Outbreak: Retrospective Observational Infoveillance Study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>04</month>
          <day>21</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e18700</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e18700/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18700</pub-id>
          <pub-id pub-id-type="medline">32293582</pub-id>
          <pub-id pub-id-type="pii">v6i2e18700</pub-id>
          <pub-id pub-id-type="pmcid">PMC7175787</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobs</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Marques</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Oates</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Kamen</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Obeid</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Word2Vec inversion and traditional text classifiers for phenotyping lupus</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>08</month>
          <day>22</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>126</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0518-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0518-1</pub-id>
          <pub-id pub-id-type="medline">28830409</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0518-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5568290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>Proceedings of the 26th International Conference on Neural Information Processing Systems - Volume 2</source>
          <year>2013</year>
          <month>12</month>
          <conf-name>26th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 5-10, 2013</conf-date>
          <conf-loc>Lake Tahoe, NV</conf-loc>
          <fpage>3111</fpage>
          <lpage>3119</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/2999792.2999959"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dkhar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Quansar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Saleem</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>Ss</given-names>
            </name>
          </person-group>
          <source>Indian J Public Health</source>
          <year>2020</year>
          <volume>64</volume>
          <issue>6</issue>
          <fpage>205</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.4103/ijph.IJPH_469_20"/>
          </comment>
          <pub-id pub-id-type="doi">10.4103/ijph.ijph_469_20</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hussain</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Majeed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Imran</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ullah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hashmi</surname>
              <given-names>FK</given-names>
            </name>
            <name name-style="western">
              <surname>Saeed</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhry</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Rasool</surname>
              <given-names>MF</given-names>
            </name>
          </person-group>
          <article-title>Knowledge, Attitude, and Practices Toward COVID-19 in Primary Healthcare Providers: A Cross-Sectional Study from Three Tertiary Care Hospitals of Peshawar, Pakistan</article-title>
          <source>J Community Health</source>
          <year>2020</year>
          <month>07</month>
          <day>06</day>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32632645"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10900-020-00879-9</pub-id>
          <pub-id pub-id-type="medline">32632645</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10900-020-00879-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC7338131</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Akinwunmi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>CJP</given-names>
            </name>
            <name name-style="western">
              <surname>Ming</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Health Communication Through News Media During the Early Stage of the COVID-19 Outbreak in China: Digital Topic Modeling Approach</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>04</month>
          <day>28</day>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>e19118</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/4/e19118/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19118</pub-id>
          <pub-id pub-id-type="medline">32302966</pub-id>
          <pub-id pub-id-type="pii">v22i4e19118</pub-id>
          <pub-id pub-id-type="pmcid">PMC7189789</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Cross-Country Comparison of Public Awareness, Rumors, and Behavioral Responses to the COVID-19 Epidemic: Infodemiology Study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>08</month>
          <day>03</day>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>e21143</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/8/e21143/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21143</pub-id>
          <pub-id pub-id-type="medline">32701460</pub-id>
          <pub-id pub-id-type="pii">v22i8e21143</pub-id>
          <pub-id pub-id-type="pmcid">PMC7402643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>McGrath</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Nature and Diffusion of COVID-19-related Oral Health Information on Chinese Social Media: Analysis of Tweets on Weibo</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>15</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e19981</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e19981/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19981</pub-id>
          <pub-id pub-id-type="medline">32501808</pub-id>
          <pub-id pub-id-type="pii">v22i6e19981</pub-id>
          <pub-id pub-id-type="pmcid">PMC7296975</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Cerise</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Application of t-SNE to human genetic data</article-title>
          <source>J Bioinform Comput Biol</source>
          <year>2017</year>
          <month>08</month>
          <day>22</day>
          <volume>15</volume>
          <issue>04</issue>
          <fpage>1750017</fpage>
          <pub-id pub-id-type="doi">10.1142/s0219720017500172</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demidenko</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>The next-generation K-means algorithm</article-title>
          <source>Stat Anal Data Min</source>
          <year>2018</year>
          <month>08</month>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>153</fpage>
          <lpage>166</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30073045"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/sam.11379</pub-id>
          <pub-id pub-id-type="medline">30073045</pub-id>
          <pub-id pub-id-type="pii">SAM11379</pub-id>
          <pub-id pub-id-type="pmcid">PMC6062903</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
