<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v27i1e63190</article-id>
      <article-id pub-id-type="pmid">39977859</article-id>
      <article-id pub-id-type="doi">10.2196/63190</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Leveraging Large Language Models for Infectious Disease Surveillance—Using a Web Service for Monitoring COVID-19 Patterns From Self-Reporting Tweets: Content Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Astha</surname>
            <given-names>Varuna</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Elbattah</surname>
            <given-names>Mahmoud</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Feng</surname>
            <given-names>Xiaoyue</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Xie</surname>
            <given-names>Jiacheng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3733-4349</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Ziyang</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-5714-3495</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>Shuai</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7632-427X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Hilliard</surname>
            <given-names>Joel</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-6926-0373</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>An</surname>
            <given-names>Guanghui</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-2669-3963</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Tang</surname>
            <given-names>Xiaoting</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6635-1044</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>Lei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6365-1965</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Yang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-1475-7487</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Wan</surname>
            <given-names>Xiufeng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2629-9234</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>Dong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Electrical Engineering and Computer Science</institution>
            <institution>University of Missouri</institution>
            <addr-line>227 Naka Hall</addr-line>
            <addr-line>Columbia, MO, 65211</addr-line>
            <country>United States</country>
            <phone>1 5738822299</phone>
            <email>xudong@missouri.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4809-0514</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Electrical Engineering and Computer Science</institution>
        <institution>University of Missouri</institution>
        <addr-line>Columbia, MO</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Christopher S. Bond Life Sciences Center</institution>
        <institution>University of Missouri</institution>
        <addr-line>Columbia, MO</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Acupuncture-Moxibustion and Tuina</institution>
        <institution>Shanghai University of Traditional Chinese Medicine</institution>
        <addr-line>Shanghai</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Shanghai Pudong New Area Wanggang Community Health Service Center</institution>
        <addr-line>Shanghai</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>NextGen Center for Influenza and Emerging Infectious Diseases</institution>
        <institution>University of Missouri</institution>
        <addr-line>Columbia</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Molecular Microbiology and Immunology</institution>
        <institution>University of Missouri</institution>
        <addr-line>Columbia</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Dong Xu <email>xudong@missouri.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>2</month>
        <year>2025</year>
      </pub-date>
      <volume>27</volume>
      <elocation-id>e63190</elocation-id>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>6</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>10</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>9</day>
          <month>12</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>16</day>
          <month>1</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Jiacheng Xie, Ziyang Zhang, Shuai Zeng, Joel Hilliard, Guanghui An, Xiaoting Tang, Lei Jiang, Yang Yu, Xiufeng Wan, Dong Xu. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 20.02.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2025/1/e63190" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The emergence of new SARS-CoV-2 variants, the resulting reinfections, and post–COVID-19 condition continue to impact many people’s lives. Tracking websites like the one at Johns Hopkins University no longer report the daily confirmed cases, posing challenges to accurately determine the true extent of infections. Many COVID-19 cases with mild symptoms are self-assessed at home and reported on social media, which provides an opportunity to monitor and understand the progression and evolving trends of the disease.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aim to build a publicly available database of COVID-19–related tweets and extracted information about symptoms and recovery cycles from self-reported tweets. We have presented the results of our analysis of infection, reinfection, recovery, and long-term effects of COVID-19 on a visualization website that refreshes data on a weekly basis.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used Twitter (subsequently rebranded as X) to collect COVID-19–related data, from which 9 native English-speaking annotators annotated a training dataset of COVID-19–positive self-reporters. We then used large language models to identify positive self-reporters from other unannotated tweets. We used the Hibert transform to calculate the lead of the prediction curve ahead of the reported curve. Finally, we presented our findings on symptoms, recovery, reinfections, and long-term effects of COVID-19 on the Covlab website.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We collected 7.3 million tweets related to COVID-19 between January 1, 2020, and April 1, 2024, including 262,278 self-reported cases. The predicted number of infection cases by our model is 7.63 days ahead of the official report. In addition to common symptoms, we identified some symptoms that were not included in the list from the US Centers for Disease Control and Prevention, such as lethargy and hallucinations. Repeat infections were commonly occurring, with rates of second and third infections at 7.49% (19,644/262,278) and 1.37% (3593/262,278), respectively, whereas 0.45% (1180/262,278) also reported that they had been infected &#62;5 times. We identified 723 individuals who shared detailed recovery experiences through tweets, indicating a substantially reduction in recovery time over the years. Specifically, the average recovery period decreased from around 30 days in 2020 to approximately 12 days in 2023. In addition, geographic information collected from confirmed individuals indicates that the temporal patterns of confirmed cases in states such as California and Texas closely mirror the overall trajectory observed across the United States.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Although with some biases and limitations, self-reported tweet data serves as a valuable complement to clinical data, especially in the postpandemic era dominated by mild cases. Our web-based analytic platform can play a significant role in continuously tracking COVID-19, finding new uncommon symptoms, detecting and monitoring the manifestation of long-term effects, and providing necessary insights to the public and decision-makers.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>self-reporting data</kwd>
        <kwd>large language model</kwd>
        <kwd>Twitter</kwd>
        <kwd>social media analysis</kwd>
        <kwd>natural language processing</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>COVID-19 is one of the most severe infectious diseases in human history. Although the World Health Organization downgraded the COVID-19 pandemic, declaring it is no longer a global emergency on May 5, 2023 [<xref ref-type="bibr" rid="ref1">1</xref>], the threat of infection and death remains. As of August 1, 2023, there have been &#62;300,000 confirmed cases weekly worldwide, resulting in &#62;1000 deaths per week; however, major information-publishing platforms, such as that at Johns Hopkins University, stopped collecting and tracking COVID-19 data worldwide on March 10, 2023 [<xref ref-type="bibr" rid="ref2">2</xref>]. Therefore, it has become more challenging to identify the actual number and trends of COVID-19 infections daily. Traditional public health monitoring methods face several challenges, including delays in clinical data collection, lack of real-time insights, and the underrepresentation of population-level trends, particularly in regions with limited health care reporting infrastructure [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. New tools are required to provide timely awareness and detection of COVID-19 transmission trends, reinfection patterns, and the long-term health impact of the disease.</p>
        <p>To supplement the shortage of clinical data and gain further insights into the development trends and variant tendencies of COVID-19, researchers have turned to social media, specifically Twitter (subsequently rebranded as X). Social media data offer unique advantages, such as rapid updates and a broad geographical reach, which traditional clinical data often lack [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Several studies [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref11">11</xref>] have explored the use of social media for health monitoring, including sentiment analysis of COVID-19–related tweets, the identification of emerging symptoms, and the study of vaccine hesitancy. Early studies [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref16">16</xref>] primarily focused on constructing COVID-19–related tweet databases. However, these works do not provide in-depth analysis of self-reported tweets, and the databases tended to be collected over a short time frame, typically several months, and cannot automatically update. Later, some research endeavors [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>] shifted their focus toward studying COVID-19 symptoms on the basis of tweets and reported the distribution of symptoms in tweets. However, these studies included limited numbers of collected self-reported cases. Some studies [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref24">24</xref>] used tweets to study geographic distribution but did not provide a corresponding time-series analysis or predict the spread of COVID-19. Moreover, existing studies have not fully leveraged user-generated content on social media to provide a comprehensive, dynamic view of COVID-19 trends across symptoms, geography, and time. This gap underscores the need for a methodology capable of providing near–real-time insights and broader geographical coverage [<xref ref-type="bibr" rid="ref25">25</xref>]. As for the visualization tools for COVID-19, some researchers [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref29">29</xref>] have developed platforms or dashboards to study the trends of COVID-19, but most were based on clinical data. A few tweet-based platforms [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>] showed limited information and failed to provide timely updates.</p>
        <p>Reinfection often refers to the phenomenon in which an individual who has recovered from COVID-19 is again infected with the virus [<xref ref-type="bibr" rid="ref31">31</xref>]. Some researchers [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>] considered that reinfection is identified when an individual tests positive again through polymerase chain reaction (PCR) testing after a minimum of 90 days of a negative result. However, some studies also suggest this duration should be 30 days [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Other studies [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>] considered reinfection to be a new positive PCR following 2 consecutive negative PCR tests taken after primary infection. Moreover, the periodicity of reinfection, the reinfection rate, and the maximum number of infections are uncertain. Existing studies on reinfection trends and periodicity have primarily relied on clinical data, leaving gaps in understanding population-level reinfection dynamics that may be observable through self-reported data on social media [<xref ref-type="bibr" rid="ref38">38</xref>]. In addition to reinfection, there is a growing concern about the long-term effects of COVID-19, known as post–COVID19 condition, where patients report symptoms lasting months after recovery [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. Clinical data on long-term effects remain limited [<xref ref-type="bibr" rid="ref41">41</xref>], but social media offers a platform where individuals can share ongoing symptoms, providing valuable insights for public health research [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. This further highlights the potential of using self-reported data to investigate the periodicity of reinfection, as well as long-term effects and symptoms, complementing traditional clinical datasets.</p>
        <p>Health natural language processing (NLP) is gaining increasing attention for its essential role in both methodology development and applications [<xref ref-type="bibr" rid="ref44">44</xref>]. The technology has been widely applied in areas such as information extraction from electronic health records [<xref ref-type="bibr" rid="ref45">45</xref>], adverse drug reaction analysis [<xref ref-type="bibr" rid="ref46">46</xref>], clinical decision support [<xref ref-type="bibr" rid="ref47">47</xref>], and hospitalization prediction [<xref ref-type="bibr" rid="ref48">48</xref>]. Using techniques such as topic modeling, sentiment analysis, and deep learning models such as Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref49">49</xref>], NLP can extract valuable medical insights from unstructured text. Combined with real-time analytics frameworks [<xref ref-type="bibr" rid="ref50">50</xref>] and knowledge graphs [<xref ref-type="bibr" rid="ref51">51</xref>], these technologies can dynamically monitor COVID-19 trends, identify previously unnoticed symptoms and long-term effects, and provide scientific support for optimizing public health strategies and resource allocation.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>In this study, we address these gaps by leveraging self-reported COVID-19 data from Twitter to provide near–real-time insights into trends across symptoms, reinfection, recovery cycles, and geographical distributions. The proposed approach not only fills the limitations of traditional monitoring systems but also offers a scalable, timely, and comprehensive method to track the pandemic’s evolving dynamics. Our visualization tool updates weekly and comprehensively analyzes information related to COVID-19 symptoms, case distribution, reinfections, recoveries, and long-term effects based on large language models (LLMs). <xref rid="figure1" ref-type="fig">Figure 1</xref> depicts our research objectives: (1) we aim to build a publicly available database of &#62;9,836,206 COVID-19–related tweets, and this database is set to automatically update with newly collected data weekly; (2) LLMs will be built to automatically filter the tweets of self-reporters and extract their mentioned symptoms and recovery cycles; and (3) we aim to build a visualization website that refreshes data on a weekly basis, Covlab [<xref ref-type="bibr" rid="ref52">52</xref>], to track and analyze infection, reinfection, recovery, and long-term effects of COVID-19.</p>
        <p>As depicted in <xref rid="figure1" ref-type="fig">Figure 1</xref>, the workflow of Covlab comprised the following steps: data collection, models training, tracking, and visualization website. During data collection, COVID-19–related tweets were collected using the Twitter application programming interface (API) and filtered from a broader dataset of social media posts. A subset of these tweets was manually labeled through a specialized annotation tool, creating a high-quality training set essential for developing machine learning models. In addition, historical tweets and user ID databases were incorporated to construct a comprehensive cohort of individuals who self-reported COVID-19 infections, forming the foundational dataset for subsequent tracking and analytical tasks. In models training step, annotated datasets were used to train a variety of machine learning and NLP models, including support vector machine (SVM), logistic regression (LR), BERT, GPT-2, and LLM meta AI 2 (Llama-2), to identify the most effective model for detecting self-reported COVID-19 infection tweets. After selecting the optimal model, it was applied to a larger collection of COVID-19–related tweets to extract tweets reporting personal COVID-19 infections. Throughout this process, performance metrics such as cross-validation and receiver operating characteristic (ROC) curve analysis, were used to ensure the models provided robust and accurate predictions. In the tracking step, the system conducted long-term tracking of individuals who self-reported COVID-19 infections in their tweets, focusing on analyzing symptoms such as breathing difficulties, fever, headache, cough, and fatigue, as well as recovery cycles, long-term effects (ie, sequelae), and geographical trends. In addition, reinfection cases were monitored to provide insights into the temporal dynamics of COVID-19 experiences, helping to uncover patterns in symptom progression, recovery, and recurring infections over time. In the visualization website step, the results of the analyses were presented on an interactive platform, Covlab, which provided a variety of visualization tools, including word clouds, pie charts, box plots, line graphs, geographic maps, and trend tables. This platform enabled users to track epidemic trends, uncover new or previously unreported symptoms, analyze recovery durations, report reinfection statistics, and explore different types of long-term effects (ie, sequelae), offering valuable insights to researchers and public health officials.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Workflow for Covlab. BERT: Bidirectional Encoder Representations from Transformers; Llama-2: large language model meta AI 2; LR: logistic regression; NLP: natural language processing; ROC: receiver operating characteristics; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e63190_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection and Preprocessing</title>
        <p>We collected and processed tweet data from January 2020 to April 2024 based on COVID-19–related keywords and hashtags through the Twitter streaming API. The searching keywords included “I.* tested[ed] positive for [covid &#124; coronavirus &#124; covid19 &#124; covid-19],” “My.* [covid &#124; coronavirus &#124; covid19 &#124; covid-19].* symptoms,” “#COVID” “#LongCovid” (all the keywords can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The following preprocessing operations were conducted on the tweets collected based on keywords. We first converted all words in the tweets to lowercase, standardized the tweets to American Standard Code for Information Interchange encoding using the Unicode library, and tokenized the tweets. Next, we removed all Unicode symbols and punctuation marks, some uninformative characters about usernames, such as @username, all digits and line breaks in the tweets, all URLs, such as http://, and all words contained in our stop word library (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). We converted emoji expressions into their corresponding textual expressions. To provide sufficient datasets for subsequent model training, we constructed a self-reported COVID-19 tweets dataset with manual labeling by 9 native English-speaking annotators after obtaining ethics approval. We also established a set of calibration criteria as shown in <xref ref-type="table" rid="table1">Table 1</xref> to ensure the consistency and reliability of the tweet annotations. Two additional annotators conducted a secondary annotation on the labeled data. If their new annotations were inconsistent with the originals, all annotators would decide through a voting process whether the tweet should be classified as a self-reported COVID-19 positive tweet. The Fleiss κ index [<xref ref-type="bibr" rid="ref53">53</xref>] was used to measure the consistency among multiple annotators to ensure the reliability of the annotations (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). We also developed a web-based annotation tool (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>) to improve the efficiency of manual annotation. We annotated 115,214 tweets, of which 13,701 (11.89%) were positive samples that described self-reported positive COVID-19 cases and 101,513 (88.11%) were negative samples either not describing self-infection with COVID-19 or irrelevant. <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> presents the types of tweets targeted in our study. The tweet depicted on the left serves as a prototypical instance, delineating self-reported information about COVID-19 infections. This includes the date of diagnosis and a detailed account of the symptoms experienced by the individual. Conversely, although the tweet shown on the right also references a diagnosis of COVID-19 and details associated symptoms, it diverges from our criteria for target tweets because it recounts a diagnosis pertaining to a third party, specifically a friend of the tweeter, rather than a self-reported account. Therefore, it falls outside the scope of our target dataset.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Labeling criteria for the tweets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="190"/>
            <col width="730"/>
            <thead>
              <tr valign="top">
                <td>Index</td>
                <td>Annotation guideline</td>
                <td>Description</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Self-reported infection</td>
                <td>Tweets must be a personal account by the author regarding their experience of contracting COVID-19. If the tweet mentions someone other than the author being infected, such as friends, family, or others, it should be labeled as a negative sample.</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>First-person narrative</td>
                <td>Tweets should use first-person pronouns (eg, “I,” “me,” and “my”) to describe the author’s experience of contracting COVID-19, not that of others.</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Concrete information</td>
                <td>Tweets should provide concrete details, including infection timeline, test results and medical treatments, among others, rather than general discussions or speculations.</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Symptom description</td>
                <td>Tweets should contain the patient’s personal descriptions of COVID-19 symptoms experienced, such as fever, cough, or difficulty breathing.</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Confirmation information</td>
                <td>Tweets should mention how the author was confirmed to have contracted COVID-19, such as the type of test conducted (PCR<sup>a</sup> and rapid antigen test), confirmation by a physician, or official institution validation.</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Treatment experience</td>
                <td>Tweets should describe the author’s experience of treatment or recovery after self-contracting COVID-19, including isolation, medication, deterioration, or improvement in their health.</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Infection timeline</td>
                <td>Tweets should contain exact time points or time ranges of the infection rather than general discussions. Providing precise timing helps verify the author’s infection period.</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Test results</td>
                <td>Tweets should reference the author’s COVID-19 test results, such as testing positive or negative or other relevant test outcomes.</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Medical Facility</td>
                <td>Tweets should mention whether the author received treatment or underwent COVID-19-related tests at a medical facility, such as a hospital or clinic.</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Social distancing measures</td>
                <td>Tweets should discuss the author’s adoption of social distancing measures due to their infection, such as self-isolation or notifying close contacts.</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>Substantial evidence</td>
                <td>Tweets should contain substantial evidence, such as medical records, official notices, or other documents validating the author’s COVID-19 infection.</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>Exclusion of transmission</td>
                <td>Tweets emphasizing that the author did not transmit the virus to others may indicate a self-reported infection.</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>Consensus annotation</td>
                <td>If 3 or more annotators provide inconsistent annotations for a particular tweet, a discussion among all annotators should be initiated to reach a final consensus.</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>PCR: polymerase chain reaction.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>We have prioritized privacy and ethical considerations in our use of self-reported COVID-19–related tweets from Twitter, which, while publicly available, are processed with strict anonymization measures to ensure that no personally identifiable information can be traced back to individuals. The ethical nuances of using such data are addressed by considering users’ implied consent when posting self-reports on a public platform. To safeguard data security throughout collection, annotation, and analysis, we have implemented encryption, controlled access, and other technical measures, ensuring that the data are strictly used for research purposes and never for commercial activities. Furthermore, recognizing the inherent noise and biases in social media data, we explicitly acknowledge the limitations of our dataset and analysis. To mitigate potential risks of misinterpretation or misuse, we emphasize that our findings are intended for population-level trend analysis rather than individual diagnosis, thereby maintaining the integrity and appropriate application of the results. This study is exempt from ethical approval because it only involves the analysis of aggregate data, ensuring that no individual privacy information is disclosed. All data have been fully deidentified, and appropriate anonymization measures have been applied to guarantee that no subject can be traced.</p>
      </sec>
      <sec>
        <title>Self-Reported COVID-19–Positive Model</title>
        <p>To select text-classification models to determine whether a tweet is a self-reported COVID-19– positive tweet, we trained both traditional machine learning methods and fine-tuned LLMs. We divided the manually labeled dataset in which 80% (92,171/115,214) was used as the training set, 10% (11,521/115,214) as the validation set, and the remaining 10% (11,521/115,214) as the test set. We used word frequency, term frequency–inverse document frequency [<xref ref-type="bibr" rid="ref54">54</xref>] vectors, and feature hashing vectors as methods for text feature extraction, and we adopted 10-fold cross-validation to ensure the reliability of the results.</p>
        <p>For conventional machine learning models, we used both linear and nonlinear models to compare different classification approaches. Because the amount of the manually annotated data were relatively small and the text data features were relatively simple, we experimented with machine learning methods such as naive Bayes [<xref ref-type="bibr" rid="ref55">55</xref>], SVMs [<xref ref-type="bibr" rid="ref56">56</xref>], and LR [<xref ref-type="bibr" rid="ref57">57</xref>]. In the SVM method, we used the radial basis function as the kernel function and set the penalty parameter to 1.2. We used an L2 regularization as the penalty term in LR, with a regularization strength parameter (C) set to 1.0. As for LLMs, we used BERT [<xref ref-type="bibr" rid="ref49">49</xref>], robustly optimized BERT pretraining approach (RoBERTa) [<xref ref-type="bibr" rid="ref58">58</xref>], extreme language model [<xref ref-type="bibr" rid="ref59">59</xref>], GPT-2 [<xref ref-type="bibr" rid="ref60">60</xref>], Bigscience Large Open-science Open-access Multilingual language model (BLOOM) [<xref ref-type="bibr" rid="ref61">61</xref>], and Llama-2 [<xref ref-type="bibr" rid="ref62">62</xref>] for training. These LLMs are pretrained on large datasets using the masked language model objective for BERT and RoBERTa; the permuted language model objective for extreme language model; and the causal language modeling objective for GPT-2, BLOOM, and Llama-2. Because GPT-2, BLOOM, and Llama-2 are generative models, they may generate nonuniform results, rendering result interpretation difficult. These pretrained LLMs were originally designed to process general text of varying lengths. Through fine-tuning these LLMs on tweets, which are inherently short texts, we adapted them to better handle short text-classification tasks. We selected models pretrained on English corpora to ensure appropriate language understanding. We borrowed the idea from previous work [<xref ref-type="bibr" rid="ref63">63</xref>], leveraging latent representations from LLMs for supervised label prediction. Hence, we designed different LLM-based classifiers integrated with various LLMs and fully connected neural networks. Precisely, each LLM serves as a backbone for encoding the tweets instead of generating text. A fully connected neural network was integrated on top of each LLM for stably accurate detection of self-reported COVID-19 cases. Unlike traditional machine learning methods requiring feature preprocessing, the LLM-based classifiers take only the text of a tweet as input. The specific parameters of the machine learning methods and LLMs can be found in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendices 6</xref> and <xref ref-type="supplementary-material" rid="app7">7</xref>.</p>
        <p>To prevent catastrophic forgetting and ensure that LLMs have a broad understanding of self-reported COVID-19 cases during the training stage, we used low-rank adaptation (LoRA) [<xref ref-type="bibr" rid="ref64">64</xref>] to fine-tune the LLM-based classifiers. LoRA enables the parameters of a model to learn effectively by introducing trainable rank decomposition matrices into the transformer architectures in the LLMs. To achieve this reparameterization, we modified the projection matrices of query, key, value, and feedforward network modules within the self-attention mechanism of the transformer.</p>
        <p>The LLM-based classifiers were trained end-to-end with the AdamW [<xref ref-type="bibr" rid="ref65">65</xref>] optimizer with a cross-entropy objective function. During the training stage, the parameters introduced by LoRA within the pretrained model were updated with gradients, and all remaining parameters were frozen. Early stopping to monitor the accuracy of the validation dataset was implemented during training. All runs were trained on the Nvidia A100 graphical processing unit with a batch size of 5 for Llama-2 and 32 for other models.</p>
        <p>We evaluated the performance of each model and chose the one that achieved the best results for predicting the daily number of self-reported confirmed cases. Subsequently, we applied a named entity recognition [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref67">67</xref>] method such as SpaCy [<xref ref-type="bibr" rid="ref68">68</xref>] and BERT-based models to extract essential symptom-related keywords from the tweets. The model was trained or fine-tuned using a labeled dataset specifically curated for health-related text, which included annotations for COVID-19 symptoms such as fever, cough, fatigue, and loss of smell or taste. For the definition of symptoms, we referred to the descriptions of symptoms within the systematized nomenclature of medicine clinical terms [<xref ref-type="bibr" rid="ref69">69</xref>] as shown in <xref ref-type="table" rid="table2">Table 2</xref>. To make our system operational, we deployed the trained model on a server. We used a script program to continuously collect COVID-19–related tweets from Twitter. The collected tweets were then analyzed using the deployed model, and the results were displayed weekly on the Covlab website. This provided users with up-to-date, analyzed information regarding COVID-19 development.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Symptoms and their expression found in self-reported tweets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="185"/>
            <col width="815"/>
            <thead>
              <tr valign="top">
                <td>Symptoms</td>
                <td>Descriptions (with their IDs in clinical terms)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Fever</td>
                <td>Febrile (386661006), fever (386661006), feverish (103001002), mill fever (85761009), hyperthermia (1197782006), hay fever (21719001), degrees Fahrenheit (258712004), temperature (722490005), high temperature (285717004), high body temperature (50177009), body temperature above reference range (50177009), increased body temperature (50177009), elevated temperature (50177009), raised temperature (50177009), increased skin temperature (17038008), feeling hot (373932008), feeling hot and cold (103002009), and feeling hot and sweaty (373939004)</td>
              </tr>
              <tr valign="top">
                <td>Chills</td>
                <td>Chills (43724002), chills and fever (274640006), chillness (43724002), shivering (43724002), shivering or rigors (248456009), rigor (38880002), brass founders’ ague (74800004), algor (425681008), shakes (26079004), shaking (26079004), trembling (267079009), cold (82272006), head cold (82272006), freeze (48103003), freezing (48103003), and frigid (48103003)</td>
              </tr>
              <tr valign="top">
                <td>Sweating</td>
                <td>Sweat (74616000), sweating (415691001), cold sweat (83547004), hot sweat (224962007), hemopoiesis (445961003), hidrosis (415691001), diaphoresis (52613005), perspiration (415691001), perspire (74616000), perspire profusely (74616000), started to perspire (74616000), perspire all over (74616000), perspire during sleep (74616000), excessive sweating (52613005), profuse sweating (52613005), and sweating profusely (52613005)</td>
              </tr>
              <tr valign="top">
                <td>Runny nose</td>
                <td>Sniffle (275280004), nose running (267101005), running nose (267101005), nose dripping (267101005), nasal discharge (267101005), and snotty (267101005)</td>
              </tr>
              <tr valign="top">
                <td>Nasal congestion</td>
                <td>Nasal congestion (68235000), congested nose (68235000), stuffed-up nose (68235000), congestion (85804007), stuffed-up nose (68235000), stuffed nose (68235000), rhinobyon (68235000), nasal obstruction (232209000), nasal airway obstruction (232209000), and stuffiness (232209000)</td>
              </tr>
              <tr valign="top">
                <td>Nosebleed</td>
                <td>Nosebleed (249366005), nose bleeds (249366005), nose bleeding (249366005), bleeding from nose (249366005), nosebleed (249366005), nasal hemorrhage (249366005), epistaxis (249366005), and nasal hemorrhage (249366005)</td>
              </tr>
              <tr valign="top">
                <td>Cough</td>
                <td>Cough (49727002), coughing (49727002), nonproductive cough (11833005), hacking cough (59994004), tussiculation, dry cough (11833005), persistent cough (284523002) acute cough (49727002), bad cough (49727002), coughing all night (161933007), evening cough (161933007), morning cough (161932002), coughing up blood (66857006), coughing and deep breathing (371605008), and begma (49727002)</td>
              </tr>
              <tr valign="top">
                <td>Headache</td>
                <td>Headache (25064002), migraine (37796009), sick headache (193028008), tension-type headache (398057008), and cluster-headache syndrome (193031009)</td>
              </tr>
              <tr valign="top">
                <td>Sneezing</td>
                <td>Sneeze (76067001), sneezing (76067001), sneezing symptom (162367006), sternutation (76067001), niesen (76067001), and achoo (76067001)</td>
              </tr>
              <tr valign="top">
                <td>Eye pain</td>
                <td>Eye pain (41652007), pain in eye (41652007), ocular pain (41652007), ocular headache (86925001), ocular dryness (162290004), dry eye (162290004), cephalalgia (25064002), diplopia (24982008), double vision (24982008), and eyelid edema (89091004)</td>
              </tr>
              <tr valign="top">
                <td>Loss of taste or smell</td>
                <td>Smell (397686008), taste (397627001), lost sense of smell (44169009), absent smell (44169009), sense of smell absent (44169009), anosmia (44169009), can’t smell (44169009), smell nothing (44169009), disorder of taste (399993004), loss of taste (36955009), absence of sense of taste (36955009), and ageusia (36955009)</td>
              </tr>
              <tr valign="top">
                <td>Sputum</td>
                <td>Sputum (45710003), expectoration (45710003), productive cough (28743005), productive cough–green sputum (161924005), productive cough–yellow sputum (161925006), and phlegm (52024008)</td>
              </tr>
              <tr valign="top">
                <td>Shortness of breath</td>
                <td>Respiratory disorder (50043002), respiratory disease (50043002), breath (11891009), shortness of breath (267036007), dyspnea (267036007), short breath (267036007), breathless (267036007), difficulty in breathing (230145002), breathing difficult (230145002), labored breathing (248549001), difficulty breath (230145002), and breathing painful (75483001)</td>
              </tr>
              <tr valign="top">
                <td>Sore throat</td>
                <td>Sore throat (267102003), throat sore (162388002), throat pain (162388002), pain in throat (162397003), and pain throat (162397003)</td>
              </tr>
              <tr valign="top">
                <td>Dizziness</td>
                <td>Dizzy (267102003), throat soreness (267102003), dizzy spells (315018008), and dizziness (404640003)</td>
              </tr>
              <tr valign="top">
                <td>Intolerance to light</td>
                <td>Intolerance of light (1285284009), photophobia (409668002), eye sensitive to light (1285284009), light sensitive (1285284009), and light sensitivity (1285284009)</td>
              </tr>
              <tr valign="top">
                <td>Hearing findings</td>
                <td>Pains of ears (301354004), earache (301354004), otalgia (301354004), ears pop (162346006), popping sensation in ear (162346006), tinnitus (60862001), and noise in ears (60862001)</td>
              </tr>
              <tr valign="top">
                <td>Loss of appetite</td>
                <td>Poor appetite (64379006), decrease in appetite (64379006), inappetence (64379006), lost my appetite (64379006), loss of appetite (79890006), no appetite (79890006), anorexia (79890006), and off food (79890006)</td>
              </tr>
              <tr valign="top">
                <td>Hallucinations</td>
                <td>Hallucinations (7,011,001), illusion (5,152,006), illusionary (5,152,006), auditory hallucination (45150006), visual hallucination (64269007), and see things (64269007)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Phase Difference Calculation</title>
        <p>Because it took time to provide the official report, the COVID-19 occurrence derived from Twitter (ie, predicted curve) was expected ahead of the official report dates (ie, actual curve). Our research used the Hilbert transform (HT) [<xref ref-type="bibr" rid="ref70">70</xref>] method to calculate the phase difference between the actual and predicted curves. HT is a method used for analyzing time-varying signals [<xref ref-type="bibr" rid="ref71">71</xref>]. It can transform a real-valued signal into a complex-valued signal, rendering it convenient for phase analysis. In signal analysis, HT is often used to calculate the instantaneous phase of a signal, which can be used to compare the phase difference between 2 signals. HT was selected for its ability to compute instantaneous phase and amplitude in the time domain, making it ideal for analyzing temporal alignment and detecting phase shifts between predicted and actual signals. Unlike Fourier transform, which is limited by global frequency components, HT offers a more accurate and intuitive method for capturing time-varying phase relationships. It is particularly useful for nonstationary signals, as it can analyze phase dynamics without assumptions of stationarity or periodicity. The phase spectrum of Fourier transform may have discontinuous jumps in some cases, which can lead to incorrect results when calculating phase differences. HT can accurately calculate the instantaneous phase of a signal and avoid this problem.</p>
        <p>The daily predicted cases curve <italic>f</italic>(t) and the daily actual cases curve <italic>g</italic>(t) share the same time sequence. To calculate the phase difference between them by performing the HT, we can first transform them into their complex-valued signals:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(1)</bold>
        </p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(2)</bold>
        </p>
        <p>Here, [H] represents the HT operator, and <italic>i</italic> is the imaginary unit. We can then calculate the instantaneous phase of each signal, usually using the <italic>arctan</italic> function to compute the phase angle:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(3)</bold>
        </p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(4)</bold>
        </p>
        <p>Finally, we subtracted the phase angles of the 2 signals to obtain the phase difference :</p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(5)</bold>
        </p>
        <p>In addition to calculating the phase difference between the 2 curves, we also conducted stationarity tests on both curves. Stationarity verification is an important step in time-series analysis and is used to determine whether a time series is stationary. We used 3 methods, the augmented Dickey-Fuller test [<xref ref-type="bibr" rid="ref72">72</xref>], the Kwiatkowski-Phillips-Schmidt-Shin test [<xref ref-type="bibr" rid="ref73">73</xref>], and the Phillips-Perron test [<xref ref-type="bibr" rid="ref74">74</xref>], to verify the stationarity of the 2 curves. We used the TimesNet [<xref ref-type="bibr" rid="ref75">75</xref>] approach from prior research to predict the current trends in COVID-19 development based on the time-series relationships between the self-reported case numbers and the actual case numbers.</p>
      </sec>
      <sec>
        <title>Evaluation of the Model</title>
        <p>Our evaluation of the model used the following methodology. True positive is the number of correct predictions in positive samples, false positive is the number of incorrect predictions in positive samples, true negative is the number of correct predictions in negative samples, and false negative is the number of incorrect predictions in negative samples. Precision is the proportion of positive predictions in all positive samples. Precision is defined as follows:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(6)</bold>
        </p>
        <p>Recall is the proportion of correct predictions in the total samples. Recall is defined as follows:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(7)</bold>
        </p>
        <p>Accuracy is defined as the percentage of correctly predicted results out of the total sample.</p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(8)</bold>
        </p>
        <p><italic>F</italic><sub>1</sub>-score is defined as follows:</p>
        <disp-formula>
          <graphic xlink:href="jmir_v27i1e63190_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>
          <bold>(9)</bold>
        </p>
        <p>To measure the performance under the unbalanced data distribution, in this work, we used the precision-recall (PR) curve and ROC curve to display the performance. The ROC curve is a curve of sensitivity versus 1–specificity on all possible prediction thresholds. Similarly, the PR curve plots precision versus recall on all possible prediction thresholds. In addition, average precision (AP) and area under the curve (AUC) derived from the PR curve and the ROC curve are also generated for quantitative comparisons in this work.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Model Performance for Self-Reporting COVID-19 Cases</title>
        <p>We evaluated the models with AUC, AUC-PR, accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score. RoBERTa and BERT achieved the best performance with an AUC of 0.98 and an AP of 0.97, as depicted in <xref rid="figure2" ref-type="fig">Figure 2</xref>. Notably, all LLMs outperformed traditional machine learning models in AUC and AP, exhibiting an AUC gain from 0.01 to 0.10 and an AP gain from 0.07 to 0.09. According to the benchmark results in <xref ref-type="table" rid="table3">Table 3</xref>, BLOOM performed the best compared with other models in accuracy and precision with 0.948 and 0.941, whereas the SVM outperformed others in recall and <italic>F</italic><sub>1</sub>-score with 0.9362 and 0.9329, respectively. Combining AUC, accuracy, and recall, we believe that the BLOOM model has the best performance. Subsequently, we used the trained model to assist us in selecting self-reported positive tweets from all COVID-19–related tweets.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Performance of various machine learning methods. (A) Receiver operating characteristics (ROC) curve with area values. The area under the curve (AUC) values, provided in the legend, quantify the overall performance of the models, with higher values indicating superior discriminative ability. (B) Precision-recall curve with average precision. AP: average precision; BERT: Bidirectional Encoder Representations from Transformers; BLOOM: Bigscience Large Open-science Open-access Multilingual language model; Llama-2: large language model meta AI 2; LR: logistic regression; NB: naive Bayes; RoBERTa: robustly optimized BERT pretraining approach; SVM: support vector machine; XLNet: extreme language model.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e63190_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance comparison of various machine learning models and large language models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="31"/>
            <col width="331"/>
            <col width="0"/>
            <col width="167"/>
            <col width="0"/>
            <col width="167"/>
            <col width="0"/>
            <col width="136"/>
            <col width="0"/>
            <col width="168"/>
            <thead>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Accuracy (%)</td>
                <td colspan="2">Precision (%)</td>
                <td colspan="2">Recall (%)</td>
                <td><italic>F</italic><sub>1</sub>-score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="10">
                  <bold>Machine learning</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Naive Bayes [<xref ref-type="bibr" rid="ref55">55</xref>]</td>
                <td colspan="2">86.86</td>
                <td colspan="2">65.62</td>
                <td colspan="2">52.43</td>
                <td colspan="2">58.29</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Logistic regression [<xref ref-type="bibr" rid="ref57">57</xref>]</td>
                <td colspan="2">92.57</td>
                <td colspan="2">91.73</td>
                <td colspan="2">92.64</td>
                <td colspan="2">92.18</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Support vector machine [<xref ref-type="bibr" rid="ref56">56</xref>]</td>
                <td colspan="2">93.62</td>
                <td colspan="2">92.96</td>
                <td colspan="2">
                  <italic>93.62</italic>
                </td>
                <td colspan="2">
                  <italic>93.29</italic>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Large language models</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT<sup>a</sup> [<xref ref-type="bibr" rid="ref49">49</xref>]</td>
                <td colspan="2">93.80</td>
                <td colspan="2">92.50</td>
                <td colspan="2">91.00</td>
                <td colspan="2">91.70</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RoBERTa<sup>b</sup> [<xref ref-type="bibr" rid="ref58">58</xref>]</td>
                <td colspan="2">93.60</td>
                <td colspan="2">91.20</td>
                <td colspan="2">91.80</td>
                <td colspan="2">91.50</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XLNet<sup>c</sup> [<xref ref-type="bibr" rid="ref59">59</xref>]</td>
                <td colspan="2">93.00</td>
                <td colspan="2">90.40</td>
                <td colspan="2">91.30</td>
                <td colspan="2">90.80</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GPT-2 [<xref ref-type="bibr" rid="ref60">60</xref>]</td>
                <td colspan="2">94.30</td>
                <td colspan="2">92.00</td>
                <td colspan="2">91.00</td>
                <td colspan="2">92.60</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BLOOM<sup>d</sup> [<xref ref-type="bibr" rid="ref61">61</xref>]</td>
                <td colspan="2">
                  <italic>94.80</italic>
                </td>
                <td colspan="2">
                  <italic>94.10</italic>
                </td>
                <td colspan="2">92.00</td>
                <td colspan="2">93.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Llama-2<sup>e</sup> (7b) [<xref ref-type="bibr" rid="ref62">62</xref>]</td>
                <td colspan="2">94.20</td>
                <td colspan="2">93.30</td>
                <td colspan="2">91.30</td>
                <td colspan="2">92.30</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>BERT: Bidirectional Encoder Representations from Transformers.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>RoBERTa: robustly optimized BERT pretraining approach.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>XLNet: extreme language model.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>BLOOM: Bigscience Large Open-science Open-access Multilingual language model.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>Llama-2: large language model meta AI 2.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Predicted Trend of COVID-19 Cases</title>
        <p>From January 1, 2020, to April 1, 2023, we used the trained BLOOM model to evaluate all the collected COVID-19–related tweets and filtered out the tweets that the model predicted as self-reported positive cases of COVID-19. Among 7.3 million COVID-19–related tweets, we identified 317,500 self-reported tweets. Using unique user IDs, we considered multiple tweets reporting a COVID-19 diagnosis by the same user to be a single case, resulting in 262,278 unique self-reported cases. The IDs of these unique confirmed users have been stored separately in an in-house database named the COVID-19 patient database (CPD), and the daily predicted number of confirmed cases by the model has been stored according to Coordinated Universal Time.</p>
        <p>To compare the predicted daily case counts with the actual daily case counts, we obtained the actual daily case counts from public platforms such as Johns Hopkins University and <italic>The New York Times</italic>. Then, we plotted the actual daily case count and predicted case count on a curve, as shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. The blue line represents the daily actual case counts, and the red line represents our predicted case counts. The red text in the figure describes key events during the outbreak, and the brown text represents the time the variant appeared. We used the HT method to calculate the phase difference between the two curves and found that the predicted curve was leading the actual curve by approximately 7.63 days. The Augmented Dickey-Fuller test results indicated values below the critical values of 1%, 5%, and 10%, accompanied by simultaneous <italic>P</italic> values of &#60;.001 and &#60;.001, which rejects the hypothesis of the existence of a unit root. In addition, both the Phillips-Perron and Kwiatkowski-Phillips-Schmidt-Shin tests exhibited <italic>P</italic> values &#60;.05. Collectively, the outcomes from these 3 tests consistently pointed toward the smoothness of the time series under scrutiny as shown in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>.</p>
        <p>There are 2 distinct peaks in the red curve. We examined the data for the first peak of the predicted curve on October 2, 2020, and the second peak on November 21, 2020. We found that the first peak on October 2, 2020, was due to then US President Donald Trump’s tweet announcing his positive COVID-19 test result, which triggered many Twitter users to also report their self-diagnosed cases on Twitter. On that day, there were 1495 tweets related to self-reported positive cases. As for the second peak in the prediction curve, we examined the relevant tweets on that day and found that most of them were related to the US election results. Many users tweeted about their infection status and discussed the US epidemic-prevention policies. There were 973 tweets on that day regarding self-reported cases of COVID-19 infection out of all tweets.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Real cases and predicted cases curves. The blue curve represents the actual daily confirmed cases, and the red curve represents the daily predicted cases. The shaded areas above and below the red and blue curves represent the CI. The red text represents key events during the outbreak, and the brown text represents the time the variant appeared. The blue shaded area on the right side represents the period during which actual confirmed case data are missing. The solid red line represents the daily self-reported COVID-19 infection numbers, and the blue dotted line represents the predicted actual infection numbers. JHU: Johns Hopkins University; WHO: World Health Organization.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e63190_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Symptoms</title>
        <p>We extracted the historical tweets of users from the CPD using their unique user IDs. The temporal scope of these tweets spanned a period from 1 month preceding the self-reported date of symptom onset to 9 months after the diagnosis date, encompassing a total duration of 10 months. Within a cohort of 24,316 reporting COVID-19 symptoms, an analysis of historical tweets identified the top 10 symptoms. <xref rid="figure4" ref-type="fig">Figure 4</xref>A plots the temporal frequency of COVID-19 symptom mentions, providing insight into how symptom prevalence evolved over time. Notably, <italic>fever</italic>, <italic>headache</italic>, <italic>fatigue</italic>, and <italic>cough</italic> emerged as consistently common symptoms. The trends observed in these symptoms closely parallel the overall trend in confirmed COVID-19 cases. We also identified that the onset of symptoms such as <italic>loss of taste or smell</italic> and <italic>shortness of breath</italic> first became prominent in September 2020, possibly correlating with the emergence of the Beta variant. Similarly, the prevalence of <italic>sore throat</italic> spiked in late 2021, potentially aligning with the rise of the Omicron variant. The symptom <italic>difficulty breathing</italic> maintained a steady presence across the timeline. Notably, less commonly reported symptoms, such as <italic>hallucinations</italic> and <italic>eye pain</italic>, not currently recognized by the US Centers for Disease Control and Prevention (CDC), appeared sporadically in user reports, suggesting their rarity in COVID-19 cases. As shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>B, these were <italic>fever</italic> (11,613/24,316, 47.76% mentioned), <italic>headache</italic> (8347/24,316, 34.33%), <italic>cough</italic> (7985/24,316, 32.84%), <italic>generalized body</italic> aches (6893/24,316, 28.35%), <italic>difficulty breathing</italic> (6169/24,316, 25.37%), <italic>fatigue</italic> (5984/24,316, 24.61%), <italic>pain</italic> (5806/24,316, 23.88%), <italic>disorder of smell and taste</italic> (5444/24,316, 22.39%), and <italic>sore throat</italic> (5082/24,316, 20.9%), listed in descending order. Notably, all symptoms except for <italic>eye pain</italic> (2541/24,316, 10.45%) aligned with those recognized by the CDC. Additional symptoms, such as <italic>lethargy</italic> (2176/24,316, 8.95%), <italic>dizziness</italic> (1451/24,316, 5.97%), and <italic>hallucinations</italic> (1086/24,316, 4.47%), although mentioned by a minority group, are not currently acknowledged as COVID-19 symptoms by the CDC.</p>
        <p>In the dataset of historical tweets from diagnosed individuals, we observed instances in which a patient mentioned multiple symptoms concurrently. To quantify this, we calculated the frequency of cooccurrence of any 2 symptoms and constructed a dependency graph to illustrate these relationships. <xref rid="figure4" ref-type="fig">Figure 4</xref>C elucidates the correlations among various symptoms, highlighting that most individuals with the infection reported experiencing a constellation of related symptoms, such as headache, cough, and fever. Furthermore, <xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref> presents a heat map that visualizes the Pearson correlation coefficients [<xref ref-type="bibr" rid="ref76">76</xref>] among these symptoms, offering a quantitative view of their interdependencies. To visually represent the range of self-reported symptoms, we used a word cloud. This graphical representation provides an immediate overview of the symptomatology as expressed by the users in our dataset.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>All symptoms mentioned by self-reporting tweets and the correlations among symptoms. (A) represents the number of mentions of COVID-19 symptoms in self-reported tweets over time. (B) represents the percentage of symptoms in all self-reporting COVID tweets, and multiple symptoms can be mentioned in 1 tweet. (C) represents the correlations among symptoms mentioned by the same user. The width of the line between 2 symptoms represents the number of tweets that mention both symptoms.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e63190_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Reinfection and Rehabilitation</title>
        <p>We analyzed historical tweets from users who self-reported COVID-19 infections, identifying 723 individuals who shared their recovery experiences. The annual breakdown of these individuals is as follows: 174 in 2020, 163 in 2021, 135 in 2022, and 251 in 2023. The duration of recovery was primarily inferred from the period mentioned in their tweets. In instances in which the recovery period was not explicitly stated, we computed it by calculating the interval between the date of confirmed diagnosis and the date of reported recovery. The data on self-reported recovery durations have been depicted through the Kaplan-Meier recovery curve [<xref ref-type="bibr" rid="ref77">77</xref>], as illustrated in <xref rid="figure5" ref-type="fig">Figure 5</xref>A. This graphical representation reveals a gradual decrease in recovery time for patients with COVID-19 from 2020 to 2023. Specifically, in 2020, most patients reported a recovery period of around 30 days, with very few recovering in &#60;30 days. In contrast, by 2023, the trend had shifted significantly, with most individuals reporting a recovery within approximately 12 days, despite a minority group still experiencing recovery periods extending beyond 30 days. <xref rid="figure5" ref-type="fig">Figure 5</xref>B presents a comprehensive overview of the evolution of recovery periods from 2020 to 2023. In addition, this figure suggests that the prevalent COVID-19 cases in 2023 were predominantly mild, indicating a possible decrease in the virulence of the virus over time.</p>
        <p>In this study, we defined a recurrent COVID-19 infection in an individual as a self-reported reinfection occurring &#62;30 days after the initial confirmed positive diagnosis. We meticulously tracked the historical tweets of all confirmed patients in CPD. <xref rid="figure5" ref-type="fig">Figure 5</xref>C presents the distribution of the intervals between the first and second infections alongside the corresponding case counts. This analysis reveals a relatively low likelihood of a repeat infection within 180 days. Most patients who had recovered from an initial infection reported a second infection approximately 260 days later. Moreover, repeat infections occurring between 300 and 600 days after recovery were also relatively frequent. The longest interval between repeat infections documented in our study extended to 720 days. Of 262,278 patients who self-reported a positive COVID-19 test result, 91.12% (n=238,993) indicated a single infection event, and 6.83% (n=17,906) reported 2 infections. A smaller subset, comprising 1.25% (n=3283) of individuals, reported 3 infections; 0.39% (n=1025) indicated 4 infections; 0.17% (n=445) reported 5 infections; 0.11% (n=301) of individuals reported 6 infections; and 0.12% (n=325) indicated ≥7 infections. Remarkably, the highest number of reported reinfections was 9, with 7 individuals documenting their ninth infection. <xref rid="figure5" ref-type="fig">Figure 5</xref>D shows that among the 238,993 patients with a single infection, 7.49% (17,906/238,993) reported a second infection. A further breakdown shows that 1.37% (3283/238,993) reported a third infection, 0.43% (1025/238,993) reported a fourth infection, and 0.45% (1071/238,993) reported experiencing ≥5 infections. We also performed a statistical analysis of the time intervals between infections among users with multiple infections.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Overview of reinfections and recovery. (A) Kaplan-Meier estimates of cumulative recoveries. (B) Rehabilitation days in different years. (C) Time to reinfection for 238,993 individuals. (D) Reinfection cases and rates.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e63190_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Distribution of Cases</title>
        <p>We extracted the geographic locations of the diagnosed users in CPD, and the distribution of all confirmed patients is shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>A [<xref ref-type="bibr" rid="ref52">52</xref>]. California had the highest number of self-reported COVID-19 cases, with 8762 cases, followed by Texas (6619 cases), Florida (4245 cases), New York (3566 cases), Illinois (2649 cases), Pennsylvania (2032 cases), Ohio (1868 cases), Massachusetts (1793 cases), Georgia (1785 cases), and Michigan (1677 cases), in descending order. Alabama and Northern Mariana Islands had the least data, with only 1 self-reported case in each state.</p>
        <p>The details of confirmed cases in each state are shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>B, in which we provide the average case counts for the entire country and each state as well as the number of self-reported cases per 10 million people per week, the trend over the past 2 weeks, and the positivity rate of each state. For example, we can see that California had the most self-reported COVID-19 cases in the past week, with at least 29 people reporting positive test results. Approximately 7.34 people per 10 million reported self-diagnosed COVID-19–positive status, and the average positivity rate of self-reported cases was 26.69%. However, compared with the previous 2 weeks, the number of self-reported COVID-19 cases decreased by 10.93%. Due to insufficient data, the trend of changes in the past 14 days was unavailable for several states, such as the Virgin Islands and Wyoming.</p>
        <p>We also plotted the time-varying curves for the confirmed cases in the top 20 states in terms of confirmed cases, as shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>C and <xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>. It is evident that the changes in the number of confirmed cases in the top 4 states with the highest number of cases closely resemble the overall trend in the United States. Some states, such as Washington, Arizona, Washington DC, and Indiana, exhibited relatively consistent changes in the number of confirmed cases over time, whereas states such as Nevada, Colorado, Alabama, and Michigan had less-consistent curves, with some dates showing no reported cases. The variation in results could have been influenced by the differing numbers of Twitter users in each state.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Cases across the United States and by state. (A) Self-reported number of infections in each state, with darker colors representing higher numbers. (B) Self-reported weekly number of infections, number of infections per 10 million people, change in trend over the previous 2 weeks, and positivity rate for a subset of states (alphabetical order). (C) Infection curve for the top 20 states with the highest infection numbers.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e63190_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Covlab Visualization Website</title>
        <p>To disseminate timely information to the public and policy makers, we have launched a visualization website, Covlab, which features our trained models and a comprehensive data pipeline. This platform consists of an automated script sequence designed to update the data weekly. The home page of Covlab offers users real-time access to the total number of tweets collected to date, including those self-reported as COVID-19–positive. On the Graphs page, users can explore the most recent weekly growth trends in COVID-19 cases alongside predictive models of actual confirmed cases. This section also enables monitoring of infection trends across various states in the United States as well as the prevailing symptom patterns observed to the current date. Furthermore, Covlab displays the proportions of reinfections and recovery periods, with these metrics also being refreshed weekly. The website’s dynamic graphs and tables serve as a valuable resource, providing the public with up-to-date information on the ongoing evolution of COVID-19, including insights into the symptomatic expressions of emerging variants.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The analysis of self-reported COVID-19 tweets offers a valuable perspective, reflecting the actual progression of the pandemic to a considerable degree. The voluminous data generated by self-reporting individuals on Twitter augment clinical datasets, offering a complementary avenue for long-term observation and tracking. This approach is particularly beneficial in bridging the data gap inherent in in-home self-testing scenarios. Our research indicates that reinfections are relatively common, with the likelihood of multiple infections diminishing over time. Moreover, this dataset has proven instrumental in identifying potential new symptoms that were not initially associated with COVID-19, such as lethargy and hallucinations, offering an early warning system for evolving clinical presentations. In addition, the approach has enabled the detection of emerging variants through geospatial clustering of self-reported cases exhibiting distinctive symptom combinations. This tracking capability enhances preparedness and enables timely public health interventions. The breakdown by state of self-reported COVID-19 tweets facilitates more efficient tracking of disease trends. Furthermore, the approach used in this study could potentially serve as a general pipeline for researching and analyzing other infectious diseases.</p>
      </sec>
      <sec>
        <title>Symptoms and Sequelae Study</title>
        <p>Within the subset of Twitter users who self-reported as COVID-19 positive, numerous accounts contained detailed descriptions of symptoms experienced during the infection. These firsthand accounts are a rich source of information. We extracted and analyzed the symptomatology mentioned in these tweets, compiling a list of the most prevalent symptoms reported by patients. Our intriguing findings closely mirror the commonly acknowledged COVID-19 symptoms listed by the CDC. However, some symptoms mentioned by a subset of patients in their tweets are not yet recognized as typical by the CDC. This discrepancy highlights the significant role of our study in identifying potential new symptoms of COVID-19 that have not been widely recognized. Such findings could provide valuable guidance for further clinical investigation into these symptoms and their association with different COVID-19 variants. In addition, the CPD established through this study offers a robust framework for long-term patient tracking. This database not only complements existing clinical data but also provides an invaluable resource for the study of post–COVID-19 sequelae and post–COVID-19 condition symptoms, thereby enhancing our understanding of the virus’s long-term impacts.</p>
      </sec>
      <sec>
        <title>Comparison With Traditional Tracking Tools</title>
        <p>Traditional clinical data–driven tools, while offering high specificity by relying on confirmed diagnoses, often underrepresent asymptomatic or mild cases. In contrast, our approach leverages self-reported data from social media platforms, such as Twitter, to capture a broader spectrum of cases, including those not reported in clinical settings. Although this may introduce noise, advanced filtering and NLP techniques effectively mitigate inaccuracies, ensuring meaningful insights. Furthermore, clinical data collection and reporting are often delayed due to testing and diagnosis, whereas our tool enables near–real-time monitoring by analyzing self-reported cases as they are posted, facilitating rapid identification of trends and potential hot spots. In terms of coverage, traditional clinical tracking tools may lack granularity, particularly in regions with limited health care infrastructure or reporting capabilities. Using social media, our tool significantly enhances coverage, incorporating underrepresented populations and geographies to provide a more comprehensive view of the pandemic’s spread. A key contribution of this approach is the integration of self-reported data, which captures personal narratives, symptom progression, and public sentiment, thereby complementing clinical data to offer a richer and more dynamic understanding of the pandemic. Importantly, while our tool excels in timeliness and coverage, its true potential lies in synergy with clinical data–driven tools. We propose future efforts to integrate these approaches, enabling cross-validation and improving overall accuracy.</p>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <p>Although our method and website are highly useful, there are some limitations. First, Twitter has a limited quota of public APIs, which renders the platform hugely expensive to run. Second, potential biases in data collection and the varying distribution of Twitter users across different states may impact the predictive accuracy. Furthermore, external factors, particularly notable events in the United States that occurred in 2020, influenced our prediction outcomes. For instance, the 2 notable peaks in self-reported cases observed in 2020 did not necessarily correlate with an actual increase in infections. Instead, these peaks were primarily driven by external events, prompting a surge in infection reporting on Twitter on those specific days. Another significant constraint is the veracity of the information in tweets. However, despite their questionable reliability, these data offer informative trend analyses and hypotheses valuable for future research and validation. Our framework demonstrates significant versatility, with potential applications extending beyond COVID-19 to monitor other self-reported illnesses, such as influenza and respiratory syncytial virus, as well as chronic conditions such as diabetes and mental health issues. Its modular design facilitates easy adaptation by allowing the customization of keywords and analytical strategies tailored to specific diseases. To enhance its functionality, we propose integrating Twitter data with other real-time health data sources, such as wearable devices and electronic health records, enabling a more comprehensive approach to health trend monitoring. In this integration, social media signals would serve as an initial screening mechanism, effectively complementing clinical data. In future work, we plan to integrate self-reported data from other social platforms, such as Reddit, to reduce data limitations and biases, thereby broadening the scope of our analysis. This expansion will support the development of a platform capable of predicting COVID-19 trends in real time based on self-reported content. Furthermore, we aim to apply this pipeline to other infectious diseases that may emerge in the future, facilitating the understanding and tracking of their development and trends. To maintain robustness and accuracy, we plan periodic retraining of the model using updated data and incorporating LLMs to better capture nuanced expressions in self-reported content. A user feedback mechanism will also be implemented to address false positives and negatives. In addition, we aim to expand the tool’s global applicability by automating data pipelines to support content in multiple languages, with a particular focus on underrepresented regions. This global expansion will enable the tool to capture disease trends across diverse geographic and cultural contexts, offering a more holistic view of global health dynamics.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study demonstrates the significant potential of using self-reported COVID-19 tweets from social media platforms for public health monitoring. By leveraging machine learning and NLP techniques, we developed a tool capable of identifying infection and recovery trends, providing valuable insights into disease spread and public behavior. Our findings contribute to the growing field of digital epidemiology, emphasizing that social media can serve as an effective complementary data source to traditional public health surveillance systems. Beyond COVID-19, this approach holds promise for monitoring other infectious diseases, mental health conditions, and chronic illnesses by adapting the model to new health-related keywords and contexts. Integrating such tools with existing health infrastructure could enhance early detection, improve situational awareness, and enable more proactive public health responses. However, the ethical considerations of using publicly available data and addressing biases inherent to social media platforms must be prioritized to ensure responsible use. Overall, this work highlights the evolving role of digital tools in public health informatics and presents opportunities for future research to further refine these methods. The integration of social media–based monitoring with traditional data systems could transform public health strategies, making them more adaptive, responsive, and inclusive of diverse population segments.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>A list of keywords and hashtags used in data collection.</p>
        <media xlink:href="jmir_v27i1e63190_app1.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Stop word list.</p>
        <media xlink:href="jmir_v27i1e63190_app2.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Interannotator agreement metrics.</p>
        <media xlink:href="jmir_v27i1e63190_app3.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Data preprocessing process and annotation system.</p>
        <media xlink:href="jmir_v27i1e63190_app4.docx" xlink:title="DOCX File , 3873 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>An example of the target cohort.</p>
        <media xlink:href="jmir_v27i1e63190_app5.docx" xlink:title="DOCX File , 277 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Number of parameters for conventional machine learning and large language models.</p>
        <media xlink:href="jmir_v27i1e63190_app6.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Configuration for conventional machine learning and large language models.</p>
        <media xlink:href="jmir_v27i1e63190_app7.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app8">
        <label>Multimedia Appendix 8</label>
        <p>Multiple stationarity verification methods and their results.</p>
        <media xlink:href="jmir_v27i1e63190_app8.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app9">
        <label>Multimedia Appendix 9</label>
        <p>Heat map of Pearson correlation coefficients among symptoms and symptoms word cloud.</p>
        <media xlink:href="jmir_v27i1e63190_app9.docx" xlink:title="DOCX File , 554 KB"/>
      </supplementary-material>
      <supplementary-material id="app10">
        <label>Multimedia Appendix 10</label>
        <p>Infection curve for the top 20 states with the highest infection numbers.</p>
        <media xlink:href="jmir_v27i1e63190_app10.docx" xlink:title="DOCX File , 291 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AP</term>
          <def>
            <p>average precision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">BLOOM</term>
          <def>
            <p>Bigscience Large Open-science Open-access Multilingual language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">CPD</term>
          <def>
            <p>COVID-19 patient database</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">HT</term>
          <def>
            <p>Hilbert transform</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">Llama-2</term>
          <def>
            <p>large language model meta AI 2</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">LoRA</term>
          <def>
            <p>low-rank adaptation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PCR</term>
          <def>
            <p>polymerase chain reaction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">PR</term>
          <def>
            <p>precision-recall</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">RoBERTa</term>
          <def>
            <p>robustly optimized Bidirectional Encoder Representations from Transformers pretraining approach</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by the National Institute of General Medical Sciences of the National Institutes of Health (R35-GM126985). The authors would like to thank all the annotators who participated in the calibration of their data, including Kent Studer, James Tipton, Thadeus Meneses, Noah Berry, Sai Akhil Chopparapu, Trinath Adusumilli, Weinan Zhang, Yijie Ren, Li Su, Chunhui Xu, and Congyu Guo.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The aggregated datasets analyzed in this study are available from the corresponding author upon request. Manual labeling data can be viewed through the annotation website. Guest users can use the username <italic>guest</italic> and password <italic>guest</italic> to log into the system for data access. Actual and predicted daily cases are publicly available on the Covlab website [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>JX and ZZ collected the data and performed the analyses. JH created the data visualizations. SZ ran the large language model and wrote the model performance portion. GA, XT, and XW reviewed and assessed the results. LJ and YY provided technical support. JX drafted the manuscript and designed the website; JX and ZZ wrote the code and conducted all the tests. DX conceived and supervised the study. All authors had full access to all the data in the study as needed and had final responsibility for the decision to submit for publication.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Statement on the fifteenth meeting of the IHR (2005) Emergency Committee on the COVID-19 pandemic</article-title>
          <source>World Health Organization</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tinyurl.com/nhk5m9y8">https://tinyurl.com/nhk5m9y8</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>COVID 19 data</article-title>
          <source>Johns Hopkins</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://coronavirus.jhu.edu/map.html">https://coronavirus.jhu.edu/map.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>JX</given-names>
            </name>
            <name name-style="western">
              <surname>Cram</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Challenges and dynamics of public health reporting and data exchange during COVID-19: insights from US hospitals</article-title>
          <source>Health Aff Sch</source>
          <year>2024</year>
          <month>01</month>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>qxad080</fpage>
          <pub-id pub-id-type="doi">10.1093/haschl/qxad080</pub-id>
          <pub-id pub-id-type="medline">38756405</pub-id>
          <pub-id pub-id-type="pii">qxad080</pub-id>
          <pub-id pub-id-type="pmcid">PMC10986213</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mavragani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gkillas</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 predictability in the United States using Google Trends time series</article-title>
          <source>Sci Rep</source>
          <year>2020</year>
          <month>11</month>
          <day>26</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>20693</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-020-77275-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-020-77275-9</pub-id>
          <pub-id pub-id-type="medline">33244028</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-020-77275-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC7692493</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <article-title>Social media and attitudes towards a COVID-19 vaccination: a systematic review of the literature</article-title>
          <source>eClinicalMedicine</source>
          <access-date>2025-01-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.thelancet.com/journals/eclinm/article/PIIS2589-5370(22)00184-5/fulltext">https://www.thelancet.com/journals/eclinm/article/PIIS2589-5370(22)00184-5/fulltext</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hohl</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>She</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gruebner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Social media mining under the COVID-19 context: progress, challenges, and opportunities</article-title>
          <source>Int J Appl Earth Obs Geoinf</source>
          <year>2022</year>
          <month>09</month>
          <volume>113</volume>
          <fpage>102967</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36035895"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jag.2022.102967</pub-id>
          <pub-id pub-id-type="medline">36035895</pub-id>
          <pub-id pub-id-type="pii">S1569-8432(22)00160-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC9391053</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Kunatharaju</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Automatically identifying self-reports of COVID-19 diagnosis on twitter: an annotated data set, deep neural network classifiers, and a large-scale cohort</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>07</month>
          <day>03</day>
          <volume>25</volume>
          <fpage>e46484</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e46484/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/46484</pub-id>
          <pub-id pub-id-type="medline">37399062</pub-id>
          <pub-id pub-id-type="pii">v25i1e46484</pub-id>
          <pub-id pub-id-type="pmcid">PMC10365612</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wickersham</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zain</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ciccarelli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Cancer and COVID-19 vaccines on Twitter: the voice and vaccine attitude of cancer community</article-title>
          <source>J Health Commun</source>
          <year>2023</year>
          <month>01</month>
          <day>02</day>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/abs/10.1080/10810730.2023.2168800"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/10810730.2023.2168800</pub-id>
          <pub-id pub-id-type="medline">36755484</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Trend and co-occurrence network of COVID-19 symptoms from large-scale social media data: infoveillance study</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>03</month>
          <day>14</day>
          <volume>25</volume>
          <fpage>e45419</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e45419/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/45419</pub-id>
          <pub-id pub-id-type="medline">36812402</pub-id>
          <pub-id pub-id-type="pii">v25i1e45419</pub-id>
          <pub-id pub-id-type="pmcid">PMC10131634</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zaidi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Samon</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jama</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gopalakrishnan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Karunasekera</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kashima</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Topics in antivax and provax discourse: yearlong synoptic study of COVID-19 vaccine tweets</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>08</month>
          <day>08</day>
          <volume>25</volume>
          <fpage>e45069</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e45069/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/45069</pub-id>
          <pub-id pub-id-type="medline">37552535</pub-id>
          <pub-id pub-id-type="pii">v25i1e45069</pub-id>
          <pub-id pub-id-type="pmcid">PMC10411425</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Storey</surname>
              <given-names>VC</given-names>
            </name>
            <name name-style="western">
              <surname>O'Leary</surname>
              <given-names>DE</given-names>
            </name>
          </person-group>
          <article-title>Text analysis of evolving emotions and sentiments in COVID-19 Twitter communication</article-title>
          <source>Cognit Comput</source>
          <year>2022</year>
          <month>07</month>
          <day>28</day>
          <fpage>1</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35915743"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s12559-022-10025-3</pub-id>
          <pub-id pub-id-type="medline">35915743</pub-id>
          <pub-id pub-id-type="pii">10025</pub-id>
          <pub-id pub-id-type="pmcid">PMC9330938</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Banda</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Tekumalla</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Artemova</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tutubalina</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chowell</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A large-scale COVID-19 Twitter chatter dataset for open scientific research-an international collaboration</article-title>
          <source>Epidemiologia (Basel)</source>
          <year>2021</year>
          <month>08</month>
          <day>05</day>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>315</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=epidemiologia2030024"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/epidemiologia2030024</pub-id>
          <pub-id pub-id-type="medline">36417228</pub-id>
          <pub-id pub-id-type="pii">epidemiologia2030024</pub-id>
          <pub-id pub-id-type="pmcid">PMC9620940</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naseem</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Razzak</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Khushi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eklund</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>COVIDSenti: a large-scale benchmark Twitter data set for COVID-19 sentiment analysis</article-title>
          <source>IEEE Trans Comput Soc Syst</source>
          <year>2021</year>
          <month>8</month>
          <volume>8</volume>
          <issue>4</issue>
          <fpage>1003</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.1109/tcss.2021.3051189</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Salathé</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kummervold</surname>
              <given-names>PE</given-names>
            </name>
          </person-group>
          <article-title>COVID-Twitter-BERT: a natural language processing model to analyse COVID-19 content on Twitter</article-title>
          <source>Front Artif Intell</source>
          <year>2023</year>
          <volume>6</volume>
          <fpage>1023281</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36998290"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/frai.2023.1023281</pub-id>
          <pub-id pub-id-type="medline">36998290</pub-id>
          <pub-id pub-id-type="pmcid">PMC10043293</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alqurashi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Alhindi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alanazi</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Large Arabic Twitter dataset on COVID-19</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online April 9, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2004.04315"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2004.04315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imran</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Qazi</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Ofli</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>TBCOV: two billion multilingual COVID-19 tweets with sentiment, entity, geo, and gender labels</article-title>
          <source>Data</source>
          <year>2022</year>
          <month>01</month>
          <day>10</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>8</fpage>
          <pub-id pub-id-type="doi">10.3390/data7010008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lakamana</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hogg-Bremer</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Garadi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Self-reported COVID-19 symptoms on Twitter: an analysis and a research resource</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>08</month>
          <day>01</day>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>1310</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32620975"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa116</pub-id>
          <pub-id pub-id-type="medline">32620975</pub-id>
          <pub-id pub-id-type="pii">5867237</pub-id>
          <pub-id pub-id-type="pmcid">PMC7337747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Radloff</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Wawrzynski</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Cloyes</surname>
              <given-names>KG</given-names>
            </name>
          </person-group>
          <article-title>Mining Twitter to explore the emergence of COVID-19 symptoms</article-title>
          <source>Public Health Nurs</source>
          <year>2020</year>
          <month>11</month>
          <volume>37</volume>
          <issue>6</issue>
          <fpage>934</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32937679"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/phn.12809</pub-id>
          <pub-id pub-id-type="medline">32937679</pub-id>
          <pub-id pub-id-type="pmcid">PMC8080690</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Purushothaman</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nali</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bardier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cuomo</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Machine learning to detect self-reporting of symptoms, testing access, and recovery associated with COVID-19 on Twitter: retrospective big data infoveillance study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>06</month>
          <day>08</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19509</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e19509/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19509</pub-id>
          <pub-id pub-id-type="medline">32490846</pub-id>
          <pub-id pub-id-type="pii">v6i2e19509</pub-id>
          <pub-id pub-id-type="pmcid">PMC7282475</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Work from home during the COVID-19 pandemic: an observational study based on a large geo-tagged COVID-19 Twitter dataset (UsaGeoCov19)</article-title>
          <source>Inf Process Manag</source>
          <year>2022</year>
          <month>03</month>
          <volume>59</volume>
          <issue>2</issue>
          <fpage>102820</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34903906"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2021.102820</pub-id>
          <pub-id pub-id-type="medline">34903906</pub-id>
          <pub-id pub-id-type="pii">S0306-4573(21)00293-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC8656435</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rusli</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nordin</surname>
              <given-names>NZ</given-names>
            </name>
            <name name-style="western">
              <surname>Ak Matusin</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Yusof</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Rosley</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Mohd Hussain</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Abu Bakar</surname>
              <given-names>SZ</given-names>
            </name>
          </person-group>
          <article-title>Geospatial mapping of suicide-related tweets and sentiments among Malaysians during the COVID-19 pandemic</article-title>
          <source>Big Data Cogn Comput</source>
          <year>2023</year>
          <month>03</month>
          <day>28</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>63</fpage>
          <pub-id pub-id-type="doi">10.3390/bdcc7020063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Flores Amaro</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Toward using Twitter for tracking COVID-19: a natural language processing pipeline and exploratory data set</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>01</month>
          <day>22</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e25314</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e25314/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25314</pub-id>
          <pub-id pub-id-type="medline">33449904</pub-id>
          <pub-id pub-id-type="pii">v23i1e25314</pub-id>
          <pub-id pub-id-type="pmcid">PMC7834613</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sukhavasi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Misra</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kaulgud</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Podder</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Geo-sentiment trends analysis of tweets in context of economy and employment during COVID-19</article-title>
          <source>J Comput Soc Sci</source>
          <year>2023</year>
          <month>03</month>
          <day>23</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37363804"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s42001-023-00201-2</pub-id>
          <pub-id pub-id-type="medline">37363804</pub-id>
          <pub-id pub-id-type="pii">201</pub-id>
          <pub-id pub-id-type="pmcid">PMC10035975</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forati</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Ghose</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Geospatial analysis of misinformation in COVID-19 related tweets</article-title>
          <source>Appl Geogr</source>
          <year>2021</year>
          <month>08</month>
          <volume>133</volume>
          <fpage>102473</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34103772"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.apgeog.2021.102473</pub-id>
          <pub-id pub-id-type="medline">34103772</pub-id>
          <pub-id pub-id-type="pii">S0143-6228(21)00089-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC8176902</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cuomo</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Purushothaman</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
          </person-group>
          <article-title>A longitudinal and geospatial analysis of COVID-19 tweets during the early outbreak period in the United States</article-title>
          <source>BMC Public Health</source>
          <year>2021</year>
          <month>04</month>
          <day>24</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>793</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-021-10827-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-021-10827-4</pub-id>
          <pub-id pub-id-type="medline">33894745</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-021-10827-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC8067788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Bodovski</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Global Tweet mentions of COVID-19</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online August 13, 2021</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2108.06385"/>
          </comment>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guntuku</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Sherman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stokes</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Seltzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
          </person-group>
          <article-title>Tracking mental health and symptom mentions on Twitter during COVID-19</article-title>
          <source>J Gen Intern Med</source>
          <year>2020</year>
          <month>09</month>
          <day>07</day>
          <volume>35</volume>
          <issue>9</issue>
          <fpage>2798</fpage>
          <lpage>800</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32638321"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-020-05988-8</pub-id>
          <pub-id pub-id-type="medline">32638321</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-020-05988-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC7340749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>An interactive web-based dashboard to track COVID-19 in real time</article-title>
          <source>Lancet Infect Dis</source>
          <year>2020</year>
          <month>05</month>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>533</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32087114"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S1473-3099(20)30120-1</pub-id>
          <pub-id pub-id-type="medline">32087114</pub-id>
          <pub-id pub-id-type="pii">S1473-3099(20)30120-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7159018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wissel</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Van Camp</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kouril</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Glauser</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>IS</given-names>
            </name>
            <name name-style="western">
              <surname>Dexheimer</surname>
              <given-names>JW</given-names>
            </name>
          </person-group>
          <article-title>An interactive online dashboard for tracking COVID-19 in U.S. counties, cities, and states in real time</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>07</month>
          <day>01</day>
          <volume>27</volume>
          <issue>7</issue>
          <fpage>1121</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32333753"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa071</pub-id>
          <pub-id pub-id-type="medline">32333753</pub-id>
          <pub-id pub-id-type="pii">5825284</pub-id>
          <pub-id pub-id-type="pmcid">PMC7188179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zohner</surname>
              <given-names>YE</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>COVID-TRACK: world and USA SARS-COV-2 testing and COVID-19 tracking</article-title>
          <source>BioData Min</source>
          <year>2021</year>
          <month>01</month>
          <day>20</day>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>4</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://biodatamining.biomedcentral.com/articles/10.1186/s13040-021-00233-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13040-021-00233-2</pub-id>
          <pub-id pub-id-type="medline">33472672</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13040-021-00233-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7816158</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guedes</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Oliveira</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Tavares</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Luna-Muschi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lazari</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Montal</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>de Faria</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Maia</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Barboza</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Leme</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Tomazini</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Reinfection rate in a cohort of healthcare workers over 2 years of the COVID-19 pandemic</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <month>01</month>
          <day>13</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>712</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-022-25908-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-022-25908-6</pub-id>
          <pub-id pub-id-type="medline">36639411</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-022-25908-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC9837751</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Flacco</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Acuti Martellucci</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Soldato</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Carota</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Fazii</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Caponetti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Manzoli</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Rate of reinfections after SARS-CoV-2 primary infection in the population of an Italian province: a cohort study</article-title>
          <source>J Public Health (Oxf)</source>
          <year>2022</year>
          <month>12</month>
          <day>01</day>
          <volume>44</volume>
          <issue>4</issue>
          <fpage>e475</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34492110"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/pubmed/fdab346</pub-id>
          <pub-id pub-id-type="medline">34492110</pub-id>
          <pub-id pub-id-type="pii">6366077</pub-id>
          <pub-id pub-id-type="pmcid">PMC8522392</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rivelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fitzpatrick</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Blair</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Copeland</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Incidence of COVID-19 reinfection among Midwestern healthcare employees</article-title>
          <source>PLoS One</source>
          <year>2022</year>
          <month>1</month>
          <day>4</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>e0262164</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0262164"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0262164</pub-id>
          <pub-id pub-id-type="medline">34982800</pub-id>
          <pub-id pub-id-type="pii">PONE-D-21-26814</pub-id>
          <pub-id pub-id-type="pmcid">PMC8726474</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Vermund</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>HZ</given-names>
            </name>
          </person-group>
          <article-title>Risk of SARS-CoV-2 reinfection: a systematic review and meta-analysis</article-title>
          <source>Sci Rep</source>
          <year>2022</year>
          <month>12</month>
          <day>01</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>20763</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-022-24220-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-022-24220-7</pub-id>
          <pub-id pub-id-type="medline">36456577</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-022-24220-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC9714387</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Reinfection in patients with COVID-19: a systematic review</article-title>
          <source>Glob Health Res Policy</source>
          <year>2022</year>
          <month>04</month>
          <day>29</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>12</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ghrp.biomedcentral.com/articles/10.1186/s41256-022-00245-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s41256-022-00245-3</pub-id>
          <pub-id pub-id-type="medline">35488305</pub-id>
          <pub-id pub-id-type="pii">10.1186/s41256-022-00245-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC9051013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A follow-up study shows that recovered patients with re-positive PCR test in Wuhan may not be infectious</article-title>
          <source>BMC Med</source>
          <year>2021</year>
          <month>03</month>
          <day>15</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>77</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-021-01954-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12916-021-01954-1</pub-id>
          <pub-id pub-id-type="medline">33715626</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12916-021-01954-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7956402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O Murchu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Byrne</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Carty</surname>
              <given-names>PG</given-names>
            </name>
            <name name-style="western">
              <surname>De Gascun</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Keogan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>O'Neill</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Harrington</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Quantifying the risk of SARS-CoV-2 reinfection over time</article-title>
          <source>Rev Med Virol</source>
          <year>2022</year>
          <month>01</month>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>e2260</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34043841"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/rmv.2260</pub-id>
          <pub-id pub-id-type="medline">34043841</pub-id>
          <pub-id pub-id-type="pmcid">PMC8209951</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oyebode</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Ndulue</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mulchandani</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Suruliraj</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Adib</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Orji</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Milios</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Matwin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Orji</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 pandemic: identifying key issues using social media and natural language processing</article-title>
          <source>J Healthc Inform Res</source>
          <year>2022</year>
          <month>06</month>
          <day>11</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>174</fpage>
          <lpage>207</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35194569"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s41666-021-00111-w</pub-id>
          <pub-id pub-id-type="medline">35194569</pub-id>
          <pub-id pub-id-type="pii">111</pub-id>
          <pub-id pub-id-type="pmcid">PMC8853170</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Altmann</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Whettlock</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Arachchillage</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Boyton</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>The immunology of long COVID</article-title>
          <source>Nat Rev Immunol</source>
          <year>2023</year>
          <month>10</month>
          <day>11</day>
          <volume>23</volume>
          <issue>10</issue>
          <fpage>618</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1038/s41577-023-00904-7</pub-id>
          <pub-id pub-id-type="medline">37433988</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41577-023-00904-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Aly</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>McCorkell</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Soares</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wulf-Hanson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Iwasaki</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Long COVID science, research and policy</article-title>
          <source>Nat Med</source>
          <year>2024</year>
          <month>08</month>
          <day>09</day>
          <volume>30</volume>
          <issue>8</issue>
          <fpage>2148</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-03173-6</pub-id>
          <pub-id pub-id-type="medline">39122965</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-03173-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bourmistrova</surname>
              <given-names>NW</given-names>
            </name>
            <name name-style="western">
              <surname>Solomon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Braude</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Strawbridge</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Long-term effects of COVID-19 on mental health: a systematic review</article-title>
          <source>J Affect Disord</source>
          <year>2022</year>
          <month>02</month>
          <day>15</day>
          <volume>299</volume>
          <fpage>118</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34798148"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jad.2021.11.031</pub-id>
          <pub-id pub-id-type="medline">34798148</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(21)01253-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC8758130</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Abbas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Draghici</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Negulescu</surname>
              <given-names>OH</given-names>
            </name>
            <name name-style="western">
              <surname>Ain</surname>
              <given-names>NU</given-names>
            </name>
          </person-group>
          <article-title>Social media application as a new paradigm for business communication: the role of COVID-19 knowledge, social distancing, and preventive attitudes</article-title>
          <source>Front Psychol</source>
          <year>2022</year>
          <volume>13</volume>
          <fpage>903082</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35664180"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2022.903082</pub-id>
          <pub-id pub-id-type="medline">35664180</pub-id>
          <pub-id pub-id-type="pmcid">PMC9160995</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Chopra</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>BB</given-names>
            </name>
            <name name-style="western">
              <surname>Peraković</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Abd El-Latif</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Role of social media in the COVID-19 pandemic: a literature review</article-title>
          <source>Data Mining Approaches for Big Data and Sentiment Analysis in Social Media</source>
          <year>2022</year>
          <publisher-loc>Boca Raton, FL</publisher-loc>
          <publisher-name>IGI Global</publisher-name>
          <fpage>21</fpage>
          <lpage>46</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Health natural language processing: methodology development and applications</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>10</month>
          <day>21</day>
          <volume>9</volume>
          <issue>10</issue>
          <fpage>e23898</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/10/e23898/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23898</pub-id>
          <pub-id pub-id-type="medline">34673533</pub-id>
          <pub-id pub-id-type="pii">v9i10e23898</pub-id>
          <pub-id pub-id-type="pmcid">PMC8569540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koleck</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Dreisbach</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bourne</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Bakken</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of symptoms documented in free-text narratives of electronic health records: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>04</month>
          <day>01</day>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>364</fpage>
          <lpage>79</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30726935"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy173</pub-id>
          <pub-id pub-id-type="medline">30726935</pub-id>
          <pub-id pub-id-type="pii">5307912</pub-id>
          <pub-id pub-id-type="pmcid">PMC6657282</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McMaster</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liew</surname>
              <given-names>DF</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Frauman</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Pires</surname>
              <given-names>DE</given-names>
            </name>
          </person-group>
          <article-title>Developing a deep learning natural language processing algorithm for automated reporting of adverse drug reactions</article-title>
          <source>J Biomed Inform</source>
          <year>2023</year>
          <month>01</month>
          <volume>137</volume>
          <fpage>104265</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(22)00270-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2022.104265</pub-id>
          <pub-id pub-id-type="medline">36464227</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(22)00270-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>What can natural language processing do for clinical decision support?</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>10</month>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>760</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(09)00108-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2009.08.007</pub-id>
          <pub-id pub-id-type="medline">19683066</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(09)00108-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC2757540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Patzer</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Pitts</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Patzer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schrager</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Prediction of emergency department hospital admission based on natural language processing and neural networks</article-title>
          <source>Methods Inf Med</source>
          <year>2017</year>
          <month>10</month>
          <day>26</day>
          <volume>56</volume>
          <issue>5</issue>
          <fpage>377</fpage>
          <lpage>89</lpage>
          <pub-id pub-id-type="doi">10.3414/ME17-01-0024</pub-id>
          <pub-id pub-id-type="medline">28816338</pub-id>
          <pub-id pub-id-type="pii">17-01-0024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online October 11, 2018</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1810.04805"/>
          </comment>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elbattah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Arnaud</surname>
              <given-names>É</given-names>
            </name>
            <name name-style="western">
              <surname>Gignon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dequen</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The role of text analytics in healthcare: a review of recent developments and applications</article-title>
          <source>Proceedings of the 14th International Joint Conference on Biomedical Engineering Systems and Technologies</source>
          <year>2021</year>
          <conf-name>BIOSTEC '21</conf-name>
          <conf-date>February 11-13, 2021</conf-date>
          <conf-loc>Virtual Event</conf-loc>
          <fpage>825</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.scitepress.org/Link.aspx?doi=10.5220/0010414508250832"/>
          </comment>
          <pub-id pub-id-type="doi">10.5220/0010414508250832</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Moslmi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gallofre Ocana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Opdahl</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Veres</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Named entity extraction for knowledge graphs: a literature overview</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>32862</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/abstract/document/8999622"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/ACCESS.2020.2973928</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <article-title>Uncover the connection, explore COVID-19 cases through tweets</article-title>
          <source>Covlab</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://covlab.tech">https://covlab.tech</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Falotico</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Quatto</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Fleiss’ kappa statistic without paradoxes</article-title>
          <source>Qual Quant</source>
          <year>2014</year>
          <month>2</month>
          <day>13</day>
          <volume>49</volume>
          <issue>2</issue>
          <fpage>463</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.1007/S11135-014-0003-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sparck Jones</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A statistical interpretation of term specificity and its application in retrieval</article-title>
          <source>J Doc</source>
          <year>1972</year>
          <month>01</month>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1108/EB026526</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Domingos</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pazzani</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>On the optimality of the simple Bayesian classifier under zero-one loss</article-title>
          <source>Mach Learn</source>
          <year>1997</year>
          <volume>29</volume>
          <issue>2</issue>
          <fpage>103</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1007413511361</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hearst</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Dumais</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Osuna</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Platt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Scholkopf</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Support vector machines</article-title>
          <source>IEEE Intell Syst Their Appl</source>
          <year>1998</year>
          <month>7</month>
          <day>10</day>
          <volume>13</volume>
          <issue>4</issue>
          <fpage>18</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1109/5254.708428</pub-id>
          <pub-id pub-id-type="pii">S0003-2670(11)00968-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hosmer Jr</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Lemeshow</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sturdivant</surname>
              <given-names>RX</given-names>
            </name>
          </person-group>
          <source>Applied Logistic Regression</source>
          <year>2013</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>John Wiley &#38; Sons</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: a robustly optimized BERT pretraining approach</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online July 26, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692"/>
          </comment>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carbonell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
          </person-group>
          <article-title>XLNet: generalized autoregressive pretraining for language understanding</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online June 19, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1906.08237"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Child</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Luan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Language models are unsupervised multitask learners</article-title>
          <source>OpenAI blog</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf">https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Workshop</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Scao</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Akiki</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlick</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ilić</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hesslow</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yvon</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>BLOOM: a 176B-parameter open-access multilingual language model</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online June 27, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2211.05100"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Touvron</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Albert</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Almahairi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Babaei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bashlykov</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Scialom</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bikel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Llama 2: open foundation and fine-tuned chat models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online July 18, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2307.09288"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2307.09288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Label supervised LLaMA finetuning</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online October 02, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2310.01208"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2310.01208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wallis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Allen-Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>LoRA: low-rank adaptation of large language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online June 17, 2021</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2106.09685"/>
          </comment>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loshchilov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Hutter</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Decoupled weight decay regularization</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online November 14, 2017</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1711.05101"/>
          </comment>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A survey on deep learning for named entity recognition</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2022</year>
          <month>1</month>
          <day>1</day>
          <volume>34</volume>
          <issue>1</issue>
          <fpage>50</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.1109/TKDE.2020.2981314</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bhatia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celikkaya</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Khalilia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Senthivel</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Comprehend medical: a named entity recognition and relationship extraction web service</article-title>
          <source>Proceedings of the 18th IEEE International Conference On Machine Learning And Applications</source>
          <year>2019</year>
          <conf-name>ICMLA '19</conf-name>
          <conf-date>December 16-19, 2019</conf-date>
          <conf-loc>Boca Raton, FL</conf-loc>
          <fpage>1844</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/8999113"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/icmla.2019.00297</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmitt</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kubler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Robert</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Papadakis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>LeTraon</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A replicable comparison study of ner software: StanfordNLP, NLTK, OpenNLP, SpaCy, Gate</article-title>
          <source>Proceedings of the 6th International Conference on Social Networks Analysis, Management and Security</source>
          <year>2019</year>
          <conf-name>SNAMS '19</conf-name>
          <conf-date>October 22-25, 2019</conf-date>
          <conf-loc>Granada, Spain</conf-loc>
          <fpage>338</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/8931850"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/snams.2019.8931850</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mostafa</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The use of SNOMED CT, 2013-2020: a literature review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>08</month>
          <day>13</day>
          <volume>28</volume>
          <issue>9</issue>
          <fpage>2017</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34151978"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab084</pub-id>
          <pub-id pub-id-type="medline">34151978</pub-id>
          <pub-id pub-id-type="pii">6307174</pub-id>
          <pub-id pub-id-type="pmcid">PMC8363812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Shih</surname>
              <given-names>HH</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tung</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>HH</given-names>
            </name>
          </person-group>
          <article-title>The empirical mode decomposition and the Hilbert spectrum for nonlinear and non-stationary time series analysis</article-title>
          <source>Proc R Soc Lond A</source>
          <year>1998</year>
          <month>03</month>
          <day>08</day>
          <volume>454</volume>
          <issue>1971</issue>
          <fpage>903</fpage>
          <lpage>95</lpage>
          <pub-id pub-id-type="doi">10.1098/rspa.1998.0193</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benitez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gaydecki</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Zaidi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fitzpatrick</surname>
              <given-names>AP</given-names>
            </name>
          </person-group>
          <article-title>The use of the Hilbert transform in ECG signal analysis</article-title>
          <source>Comput Biol Med</source>
          <year>2001</year>
          <month>09</month>
          <volume>31</volume>
          <issue>5</issue>
          <fpage>399</fpage>
          <lpage>406</lpage>
          <pub-id pub-id-type="doi">10.1016/s0010-4825(01)00009-9</pub-id>
          <pub-id pub-id-type="medline">11535204</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(01)00009-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mushtaq</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Augmented Dickey Fuller test</article-title>
          <source>SSRN Journal</source>
          <comment>Preprint posted online August 17, 2011</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1911068"/>
          </comment>
          <pub-id pub-id-type="doi">10.2139/ssrn.1911068</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The KPSS stationarity test as a unit root test</article-title>
          <source>Econ Lett</source>
          <year>1992</year>
          <month>4</month>
          <volume>38</volume>
          <issue>4</issue>
          <fpage>387</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1016/0165-1765(92)90023-R</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breitung</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Franses</surname>
              <given-names>PH</given-names>
            </name>
          </person-group>
          <article-title>On Phillips–Perron-type tests for seasonal unit roots</article-title>
          <source>Econom Theory</source>
          <year>1998</year>
          <month>04</month>
          <day>01</day>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>200</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1017/S0266466698142032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>TimesNet: temporal 2D-variation modeling for general time series analysis</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online October 5, 2022</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2210.02186"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2210.02186</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benesty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Benesty</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Pearson correlation coefficient</article-title>
          <source>Noise Reduction in Speech Processing</source>
          <year>2009</year>
          <publisher-loc>Berlin, Germany</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>1</fpage>
          <lpage>4</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bland</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>DG</given-names>
            </name>
          </person-group>
          <article-title>Survival probabilities (the Kaplan-Meier method)</article-title>
          <source>BMJ</source>
          <year>1998</year>
          <month>12</month>
          <day>05</day>
          <volume>317</volume>
          <issue>7172</issue>
          <fpage>1572</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/9836663"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.317.7172.1572</pub-id>
          <pub-id pub-id-type="medline">9836663</pub-id>
          <pub-id pub-id-type="pmcid">PMC1114388</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
