<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e75500</article-id><article-id pub-id-type="doi">10.2196/75500</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Intervention in Health Misinformation Using Large Language Models for Automated Detection, Thematic Analysis, and Inoculation: Case Study on COVID-19</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Malek</surname><given-names>Samira</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Griffin</surname><given-names>Christopher</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Fraleigh</surname><given-names>Robert D</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lennon</surname><given-names>Robert</given-names></name><degrees>MD, JD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Monga</surname><given-names>Vishal</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Shen</surname><given-names>Lijiang</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Computer Science and Engineering, Pennsylvania State University</institution><addr-line>University Park</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Applied Research Laboratory, Pennsylvania State University</institution><addr-line>University Park</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Mathematics, Pennsylvania State University</institution><addr-line>University Park</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff4"><institution>PrimeCare Medical</institution><addr-line>Harrisburg</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff5"><institution>Department of Electrical Engineering, Pennsylvania State University</institution><addr-line>University Park</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff6"><institution>Department of Communication Arts and Sciences, Pennsylvania State University</institution><addr-line>211 Sparks Building</addr-line><addr-line>University Park</addr-line><addr-line>PA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Chumachenko</surname><given-names>Dmytro</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Choi</surname><given-names>Eun Cheol</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Lijiang Shen, PhD, Department of Communication Arts and Sciences, Pennsylvania State University, 211 Sparks Building, University Park, PA, 16802, United States, 1 (814) 865-1736; <email>lus32@psu.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>8</day><month>1</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e75500</elocation-id><history><date date-type="received"><day>09</day><month>04</month><year>2025</year></date><date date-type="rev-recd"><day>12</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>13</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Samira Malek, Christopher Griffin, Robert D Fraleigh, Robert Lennon, Vishal Monga, Lijiang Shen. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 8.1.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e75500"/><abstract><sec><title>Background</title><p>The rapid growth of social media as an information channel has enabled the swift spread of inaccurate or false health information, significantly impacting public health. This widespread dissemination of misinformation has caused confusion, eroded trust in health authorities, led to noncompliance with health guidelines, and encouraged risky health behaviors. Understanding the dynamics of misinformation on social media is essential for devising effective public health communication strategies.</p></sec><sec><title>Objective</title><p>This study aims to present a comprehensive and automated approach that leverages large language models (LLMs) and machine learning techniques to detect misinformation on social media, uncover the underlying causes and themes, and generate refutation arguments, facilitating control of its spread and promoting public health outcomes by inoculating people against health misinformation.</p></sec><sec sec-type="methods"><title>Methods</title><p>We use 2 datasets to train 3 LLMs, namely, BERT, T5, and GPT-2, to classify documents into 2 categories: misinformation and nonmisinformation. In addition, we use a separate dataset to identify misinformation topics. To analyze these topics, we applied 3 topic modeling algorithms&#x2014;Latent Dirichlet Allocation, Top2Vec, and BERTopic&#x2014;and selected the optimal model based on performance evaluated across 3 metrics. Using a prompting approach, we extract sentence-level representations for the topics to uncover their underlying themes. Finally, we design a prompt text capable of identifying misinformation themes effectively.</p></sec><sec sec-type="results"><title>Results</title><p>The trained BERT model demonstrated exceptional performance, achieving 98% accuracy in classifying misinformation and nonmisinformation, with a 44% reduction in false-positive rates for artificial intelligence&#x2013;generated misinformation. Among the 3 topic modeling approaches used, BERTopic outperformed the others, achieving the highest metrics with a Coherence Value of 0.41, Normalized Pointwise Mutual Information of &#x2212;0.086, and Inverse Rank-Biased Overlap of 0.99. To address the issue of unclassified documents, we developed an algorithm to assign each document to its closest topic. In addition, we proposed a novel method using prompt engineering to generate sentence-level representations for each topic, achieving a 99.6% approval rate as &#x201C;appropriate&#x201D; or &#x201C;somewhat appropriate&#x201D; by 3 independent raters. We further designed a prompt text to identify themes of misinformation topics and developed another prompt capable of detecting misinformation themes with 82% accuracy.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study presents a comprehensive and automated approach to addressing health misinformation on social media using advanced machine learning and natural language processing techniques. By leveraging LLMs and prompt engineering, the system effectively detects misinformation, identifies underlying themes, and provides explanatory responses to combat its spread. The proposed method was tested on an English language COVID-19&#x2013;related dataset and has not been evaluated on real-world online social media data; the experiments were conducted offline.</p></sec></abstract><kwd-group><kwd>large language models</kwd><kwd>topic modeling</kwd><kwd>COVID-19</kwd><kwd>misinformation</kwd><kwd>prompt engineering</kwd><kwd>machine learning</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Misinformation and inaccurate beliefs and knowledge about health can substantially undermine well-being by fueling confusion, eroding trust in reliable medical advice, and prompting risky behaviors such as rejecting vaccines, turning to scientifically unproven home remedies, or neglecting protective measures amid clear dangers [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. These inaccuracies often circulate rapidly via social media, exploiting emotional narratives that overshadow fact-based content and leading individuals to question the legitimacy of evidence-based interventions [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Repeated exposure to misinformation reduces health literacy and can reinforce people&#x2019;s belief in falsehoods, making them more likely to view credible health authorities with skepticism [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. As a result, misinformation weakens the success of prevention and treatment strategies, paving the way for heightened disease transmission, avoidable complications, and deteriorating outcomes at both individual and community levels [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>An illustration comes from the COVID-19 pandemic, which saw an unprecedented surge of misinformation and conspiracy theories&#x2014;labeled an &#x201C;infodemic&#x201D; by the World Health Organization (WHO) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. False remedies, unverified claims on the origins of the virus, and politicized narratives about preventive measures severely hampered containment efforts [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. While proven strategies such as mask wearing, vaccination, and physical distancing were promoted by scientific authorities, social media rumors cast doubt on vaccine safety and the reality of the virus itself, discouraging people from getting vaccinated or seeking appropriate medical care [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. This breakdown in adherence prolonged outbreaks, overloaded health infrastructures, and ultimately jeopardized global health and economic stability [<xref ref-type="bibr" rid="ref25">25</xref>].</p><p>A parallel can be drawn from discussions around the human papillomavirus (HPV) vaccine, which has proven crucial in preventing various HPV-related cancers, including cervical cancer that claims thousands of lives each year [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. Widespread misinformation about adverse effects and conspiracies regarding its necessity led to a significant portion of unvaccinated adolescents, heightening the likelihood of HPV infection and future malignancies [<xref ref-type="bibr" rid="ref30">30</xref>]. This trend not only increased the burden on public health systems but also underscored the power of misinformation to undermine trust in legitimate medical counsel.</p><p>In recent years, social media has become a central and highly accessible source of information for millions of users worldwide [<xref ref-type="bibr" rid="ref31">31</xref>]. However, its ability to rapidly disseminate content&#x2014;including unfounded claims&#x2014;creates fertile ground for large-scale propagation of misinformation. Given the sheer volume of posts, manual monitoring and analysis of such content are impractical [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>] Consequently, developing and using automated, data-driven methods to understand and manage the dynamics of digital misinformation are essential for preserving accurate information and safeguarding public trust.</p><p>In this study, we propose an automated system designed to identify whether a given text contains misinformation. If misinformation is detected, the system analyzes the theme of the misinformation and provides a refutation argument (inoculation) to help prevent its spread on social media and enhance public health awareness. To achieve this, we leverage a large language model (LLM) to detect misinformation effectively. Furthermore, we demonstrate that enriching datasets significantly improves the detection of misinformation generated by both humans and AI. Recent advances in LLMs, such as ChatGPT, have enabled the generation of increasingly sophisticated misinformation, which poses challenges for traditional machine learning (ML) methods in distinguishing AI-generated misinformation [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. While prior research has highlighted the effectiveness of deep learning methods in classifying health-related misinformation, these efforts have predominantly focused on content generated by humans [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Moreover, our proposed process generates sentence-level descriptions of misinformation topics, eliminating the need for manual interpretation. However, prior approaches relied on ML-based methods that produced word-level topic representations, which required manual interpretation to form coherent sentence-level topics&#x2014;introducing potential human errors and subjective biases [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Similar challenges arise in other ML-based applications, such as optimizing models in industries where manual calibration of parameters can lead to inefficiencies and errors. For example, recent research has demonstrated that data-driven models can enhance predictive accuracy and automate decision-making, reducing human intervention in systems that rely on complex data streams [<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. Inspired by these advances, our process generates sentence-level descriptions of misinformation topics, eliminating the need for manual interpretation. In addition, we introduce an algorithm to assign documents to the most relevant topics. This addresses the limitation of many ML-based topic modeling algorithms, which often leave some documents unclassified. Our process also identifies overarching themes of misinformation topics automatically, providing a high-level understanding of the underlying reasons for misinformation categorization. Although the COVID-19 pandemic serves as our illustrative case due to its scale and data availability, the underlying challenges we address&#x2014;rapid online spread, emotionally charged narratives, and declining trust&#x2014;are common across other health contexts (eg, HPV vaccines) and beyond. Our approach does not rely on COVID-specific lexicons or handcrafted rules. Instead, the proposed Misinformation Detection and Inoculation Process (MDIP) is domain-agnostic. It ingests free English text, induces topics using standard models, transforms word lists into sentence-level descriptors through targeted LLM prompting, organizes them into hierarchical themes (guided by coherence, diversity metrics, and generic embeddings), and maps themes to refutation templates. Each stage has the potential to be applied to other health domains and misinformation settings, provided that the AI is properly trained with the topic- or domain-specific data.</p><p>Many previous studies have focused on individual aspects of the misinformation problem, such as detection or topic analysis [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], but have not integrated these steps into a unified framework for intervention. Our MDIP and Misinformation Detection and Inoculation System (MDIS) frameworks unify misinformation detection, topic modeling, thematic refutation, and public health intervention into a single, automated workflow. This end-to-end approach enables health teams to move beyond merely identifying misinformation to actively and effectively countering it.</p><p>In the study by He et al [<xref ref-type="bibr" rid="ref42">42</xref>], a method was proposed to generate per-claim counterresponses. While generating responses tailored to each specific piece of misinformation can be more informative and persuasive, such approaches require paired datasets of misinformation posts and response arguments for model training&#x2014;datasets that are difficult to construct. Moreover, misinformation often mutates through paraphrasing and subtle edits; claim-specific pipelines are fragile in the face of such variation [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. By contrast, our theme-level refutation approach is robust to these surface changes. More recently, LLM-based topic modeling approaches, such as TopicGPT [<xref ref-type="bibr" rid="ref45">45</xref>] and other methods [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], have leveraged the capabilities of powerful pretrained models such as ChatGPT and LLaMA. While these methods benefit from the models&#x2019; deep understanding of language, they often require passing entire documents through parameter-rich models, which leads to increased latency and computational costs compared with traditional pipelines. Furthermore, they typically yield only single-level sets of word topics. In contrast, our hybrid framework balances efficiency and expressiveness: we first use a traditional topic inducer to efficiently uncover the underlying structure and then apply LLM prompting (via ChatGPT) where it provides the added value. Specifically, the LLM is used to transform word lists into sentence-level topic labels and to organize topics into hierarchical themes. This targeted use of LLMs preserves computational efficiency while producing richer, hierarchical, and deployment-ready representations that are well suited for downstream tasks such as detection, monitoring, and refutation.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>In this study, we propose the MDIP, a comprehensive framework designed to analyze the dynamics of misinformation automatically and develop an MDIS. The MDIS end-to-end pipeline (1) flags misinformation, (2) explains what it is about via topics and higher-level themes, and (3) returns a concise, theme-matched refutation. The components are modular and feed one another in a simple data flow. No step in MDIP and MDIS uses disease-specific features; inputs are raw text and model hyperparameters chosen by intrinsic criteria (topic coherence and diversity). This makes the pipeline directly applicable to other misinformation corpora after swapping in the relevant documents. The MDIP framework is structured into four interconnected sections, each addressing a critical aspect of misinformation management:</p><list list-type="order"><list-item><p><italic>Detect misinformation</italic>: Collect a labeled dataset and then train LLMs to classify text documents as either misinformation or nonmisinformation, providing a foundation for identifying false narratives.</p></list-item><list-item><p><italic>Misinformation topics</italic>: Here, a topic modeling algorithm is applied to uncover the key topics within misinformation datasets. This step helps categorize misinformation into specific subject areas, enabling a better understanding of its thematic structure.</p></list-item><list-item><p><italic>Topic descriptions</italic>: This section uses prompt engineering and the results of the previous section to enhance interpretability by generating sentence-level representations for each topic, moving beyond traditional word-level outputs. These descriptive summaries provide meaningful context for understanding the essence of each topic.</p></list-item><list-item><p><italic>Provide refutation</italic>: In the final step, topic descriptions and extracted themes are used to design a specific prompt that identifies the underlying themes of misinformation. The system then generates clear and contextually relevant refutation arguments tailored to the detected misinformation themes. These arguments are designed to counter false narratives, improve public understanding, and mitigate the spread of misinformation. The refutations are a key component in the psychological inoculation-based misinformation mitigation intervention. As a metaphor to medical vaccination, the typical inoculation strategy consists of an attack message (ie, as a small weakened or deactivated dose of the virus) and refutation or counterarguments against the attack message (ie, as the immune system&#x2019;s reaction to the vaccine when it is injected or otherwise enters the human body) [<xref ref-type="bibr" rid="ref48">48</xref>].</p></list-item></list><p>By integrating these components, MDIP enables the development of MDIS, an intelligent and automated system capable of detecting misinformation, identifying its themes, and delivering refutations to combat its impact on public health. The overall architecture of the proposed framework is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>. Figure 1A outlines the 4 stages of the MDIP: supervised detection of misinformation, topic modeling, generation of interpretable topic descriptions, and theme-based refutation. <xref ref-type="fig" rid="figure1">Figure 1B</xref> presents the end-to-end workflow of the MDIS, which processes new text inputs to produce a misinformation classification, assign the text to a thematic category, and generate a matched refutation. Finally, <xref ref-type="fig" rid="figure1">Figure 1C</xref> provides an example of the user-facing output, demonstrating how the system delivers both a misinformation warning and a concise, contextually relevant refutation.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>(<bold>A</bold>) Overview of the Misinformation Detection and Inoculation Process (MDIP) integrates four main stages: (1) misinformation detection through supervised classification, (2) topic modeling of misinformation texts and assignment of outliers, (3) generation of interpretable topic descriptions and aggregation into higher-level themes, and (4) theme detection and provision of theme-linked refutations via prompt engineering for detected misinformation. (<bold>B</bold>) Misinformation Detection and Inoculation System (MDIS) workflow: For any new text, the system outputs (1) the misinformation decision, (2) its most likely theme, and (3) a theme-matched refutation. (<bold>C</bold>) Example: Illustration of how the system delivers the final user-facing output, providing a warning about misinformation and the matched refutation text. LLM: large language model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e75500_fig01.png"/></fig></sec><sec id="s2-2"><title>Detect Misinformation</title><p>Misinformation detection in text documents has become a critical area of research due to the growing prevalence of misleading or false information online. To address this challenge, we use classifiers based on LLMs. These LLMs are trained to categorize text into 2 classes: misinformation and nonmisinformation, as shown in the first part of <xref ref-type="fig" rid="figure1">Figure 1A</xref>.</p><p>Our classifier was trained using 2 complementary datasets, each providing diverse linguistic characteristics to enhance performance in detecting misinformation. The first dataset, the AAAI 2021 Competition Dataset [<xref ref-type="bibr" rid="ref49">49</xref>], consists of misinformation sourced from social media platforms such as Facebook and X (formerly known as Twitter). This dataset reflects the informal, conversational style of social media, characterized by casual tone, nonstandard grammar, and the use of slang. The second dataset, COVID_19FNIR [<xref ref-type="bibr" rid="ref50">50</xref>], includes misinformation presented in formal, structured language, offering a stark contrast to the informal nature of the first dataset. By incorporating these 2 datasets, we trained 3 different LLMs to detect misinformation effectively across a wide spectrum of communication styles. The blend of informal and formal language enabled the model to better generalize, achieving improved accuracy and robustness in identifying misinformation, whether generated by humans or artificial intelligence (AI).</p><p>Traditionally, researchers collect human-written data from social media platforms such as Twitter and Facebook, label them as misinformation or nonmisinformation, and then train a deep neural network to classify such documents [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. However, recent studies have demonstrated that deep neural networks trained exclusively on human-written datasets exhibit weaker accuracy in detecting AI-generated misinformation compared with human-written misinformation. This discrepancy arises because AI-generated misinformation often adopts formal language styles similar to accurate information shared by credible sources such as the WHO and the Centers for Disease Control and Prevention (CDC) on official social media accounts [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>In this research, by combining a dataset with formal language and another with informal language (enriching the dataset with different language types and more misinformation), we demonstrate that LLMs achieve reasonable accuracy in detecting AI-generated misinformation. This approach ensures better generalization and robustness, bridging the gap in identifying misinformation across diverse linguistic styles.</p></sec><sec id="s2-3"><title>Misinformation Topics</title><p>As outlined in the second section of <xref ref-type="fig" rid="figure1">Figure 1A</xref>, our approach involves 3 key steps. First, we collect misinformation data. Next, we select and compare topic modeling algorithms based on specific features and metrics to identify the most effective model. Finally, we design an algorithm that assigns topics to new or unclassified documents.</p><p>To identify misinformation topics, we used one of the largest datasets of verified COVID-19 claims, the IFCN dataset, which has been extensively used in related research [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]. We applied 3 topic modeling algorithms&#x2014;Latent Dirichlet Allocation (LDA) [<xref ref-type="bibr" rid="ref55">55</xref>], Top2Vec [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>], and BERTopic [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]&#x2014;to analyze this dataset.</p><p>To evaluate and compare the performance of these algorithms, we selected 3 metrics: Coherence Value (CV) [<xref ref-type="bibr" rid="ref59">59</xref>], Normalized Pointwise Mutual Information (NPMI) [<xref ref-type="bibr" rid="ref59">59</xref>], and Inverse Rank-Biased Overlap (IRBO) [<xref ref-type="bibr" rid="ref60">60</xref>]. CV and NPMI measure the coherence of the topics, ensuring that they are logically consistent, being human interpretable, and meaningful. IRBO, on the other hand, evaluates the diversity of the topics generated by the model, which is crucial for ensuring broad coverage of the dataset&#x2019;s content. Since our focus is on misinformation within health-related social media data, coherence and diversity are particularly important to ensure that topics are both interpretable and representative.</p><p>After selecting the best-performing topic model, we developed an algorithm to address the issue of unclassified documents. This algorithm assigns topics to new or previously unassigned documents, ensuring comprehensive topic coverage and improved usability of the model for real-world applications.</p></sec><sec id="s2-4"><title>Topic Description</title><p>Topic modeling algorithms typically produce word-level representations for each topic. While these representations provide insight into the most relevant words associated with a topic, they often lack the semantic depth necessary to precisely identify the specific topic within a document. This limitation arises because word-level outputs fail to capture the context and relationships between words that define the overarching theme of a topic [<xref ref-type="bibr" rid="ref51">51</xref>].</p><p>Recent advancements in LLMs have demonstrated their ability to generate high-quality, contextually relevant outputs with minimal or zero additional training by designing carefully crafted inputs&#x2014;referred to as prompt engineering [<xref ref-type="bibr" rid="ref61">61</xref>]. Leveraging this capability, we address the limitations of word-level representations by using prompt engineering techniques to generate sentence-level representations for each topic. These sentence-level representations capture the context and essence of the topic, enabling a more accurate and interpretable understanding of the document content.</p><p>Subsequently, these sentence-level representations are used to identify and articulate the overarching themes of the topics, also at the sentence level. This approach provides a more comprehensive view of the thematic structure within the document corpus. Finally, recognizing that all documents within the dataset share a common underlying reason for being classified as misinformation, we develop a tailored response list for each topic theme. The third section of <xref ref-type="fig" rid="figure1">Figure 1A</xref> illustrates these 3 steps.</p></sec><sec id="s2-5"><title>Provide Refutation</title><p>In the final step of our proposed method, as illustrated in the final part of <xref ref-type="fig" rid="figure1">Figure 1A</xref>, we identify the overarching theme of misinformation and provide a corresponding response from a preconstructed response list. This response list is developed in the preceding step based on the identified themes.</p><p>To determine the themes of misinformation, we use prompt engineering techniques. By designing carefully crafted and contextually appropriate prompt text, we effectively extract the underlying themes associated with misinformation. This approach allows us to translate complex word-level or sentence-level representations into meaningful thematic insights.</p><p>By identifying misinformation themes and providing precise, theme-based responses, our method aims to enhance public health knowledge and reduce the spread of misinformation. This proactive approach not only mitigates the risks associated with false or misleading information but also fosters a more informed and resilient society.</p></sec><sec id="s2-6"><title>Proposed System</title><p>Following the completion of four foundational steps&#x2014;(1) detecting misinformation, (2) identifying misinformation topics, (3) describing topics, and (4) providing refutations&#x2014;we develop our comprehensive MDIS, which consists of three key components.</p><list list-type="order"><list-item><p><italic>Detection of misinformation</italic>: The system begins by determining whether a given document is misinformation.</p></list-item><list-item><p><italic>Identification of misinformation themes</italic>: If the document is classified as misinformation, the system analyzes its content to identify the underlying misinformation themes. This process involves extracting thematic representations that provide a clearer understanding of the document&#x2019;s misleading aspects.</p></list-item><list-item><p><italic>Providing refutations</italic>: Finally, the system generates a detailed refutation argument for the identified misinformation themes. These arguments are derived from a predesigned response list tailored to address specific misinformation themes effectively.</p></list-item></list><p>All 3 components of the system are demonstrated with a practical example, as illustrated in <xref ref-type="fig" rid="figure1">Figure 1B and C</xref>. This example highlights how the system operates cohesively to detect misinformation, uncover its thematic structure, and deliver accurate refutations, ultimately contributing to a more informed and resilient public.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study did not involve human participants, human tissue, or the collection of identifiable private information by the authors. All analyses were conducted on previously collected, publicly available, and deidentified datasets, obtained solely for research purposes. Specifically, the data sources include: (1) the AAAI 2021 COVID-19 Fake News Detection Competition dataset, originally released as part of the AAAI Conference on Artificial Intelligence shared task, in which all social media content was anonymized and distributed for noncommercial research use only [<xref ref-type="bibr" rid="ref49">49</xref>]; (2) the COVID-19 FNIR (Fake News and Information Reliability) dataset, introduced by prior studies for misinformation detection research and released in deidentified form for academic use [<xref ref-type="bibr" rid="ref50">50</xref>]; and (3) the International Fact-Checking Network (IFCN) COVID-19 fact-checking corpus, which aggregates publicly available fact-check articles produced by IFCN-certified organizations and contains no personal or sensitive individual-level data [<xref ref-type="bibr" rid="ref62">62</xref>]. According to the US Department of Health and Human Services Common Rule (45 CFR &#x00A7;46.104(d)), secondary research involving publicly available, deidentified data does not constitute human subjects research and is therefore exempt from Institutional Review Board review [<xref ref-type="bibr" rid="ref63">63</xref>]. The research complied with all relevant ethical standards and data use policies and poses no risk to individuals or communities. The study&#x2019;s sole objective is to advance computational methods for understanding and mitigating the spread of health misinformation.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Text Classification</title><p>Due to the exceptional performance of LLMs across a wide range of AI tasks, we leveraged 3 prominent LLMs to fine-tune them for COVID-19 text classification. These models&#x2014;BERT (Bidirectional Encoder Representations from Transformers), GPT-2 (Generative Pre-trained Transformer 2), and T5-base (Text-to-Text Transfer Transformer)&#x2014;are renowned for their ability to understand and process natural language with high accuracy and contextual awareness.</p><p>BERT is particularly effective in handling text classification tasks due to its bidirectional context understanding, which allows it to capture nuanced language patterns [<xref ref-type="bibr" rid="ref64">64</xref>]. GPT-2 excels in text generation and classification by leveraging its autoregressive architecture to predict sequences in a given context [<xref ref-type="bibr" rid="ref65">65</xref>]. Finally, T5-base under a unified framework that reformulates all NLP tasks as a text-to-text problem, making it versatile and effective across various domains [<xref ref-type="bibr" rid="ref66">66</xref>].</p><p>To conduct this study, we combined the AAAI 2021 competition dataset with the COVID-19 FNIR dataset. The data were split into training, testing, and validation sets with proportions of 67%, 17%, and 16%, respectively.</p><p>Accuracy, <italic>F</italic><sub>1</sub>-score, Recall, and Precision are standard metrics for evaluating classification models. Accuracy measures the proportion of all predictions that are correct, providing an overall performance indicator but sometimes masking class imbalances. Precision quantifies the fraction of predicted positives that are truly positive, reflecting how often the model avoids false alarms. Recall (or sensitivity) measures the fraction of actual positives that the model successfully identifies, highlighting its ability to capture relevant cases [<xref ref-type="bibr" rid="ref67">67</xref>]. <italic>F</italic><sub>1</sub>-score is the harmonic mean of precision and recall, balancing the trade-off between the two. In health-related text classification tasks such as misinformation detection, reasonable thresholds are often set required due to the risks of misclassification&#x2014;for instance, aiming for Accuracy &#x003E;0.80, <italic>F</italic><sub>1</sub>-score &#x2265;0.75, Recall &#x2265;0.75, and Precision &#x2265;0.70&#x2014;to ensure both reliable detection and practical usability in downstream inoculation public health applications. <xref ref-type="table" rid="table1">Table 1</xref> shows the evaluation metrics, including Accuracy, <italic>F</italic><sub>1</sub>-score, Recall, and Precision, for all 3 models on the test dataset. Among these, BERT achieved the highest performance, with an accuracy of <italic>98%</italic> on the test data. This result highlights BERT&#x2019;s ability to handle complex linguistic structures and its effectiveness in fine-tuning for domain-specific tasks such as COVID-19 text classification.</p><p>The confusion matrices in <xref ref-type="fig" rid="figure2">Figure 2</xref> for BERT, GPT-2, and T5-base further illustrate the performance of these models, providing a detailed breakdown of true positives, true negatives, false positives, and false negatives, which helps in understanding their classification strengths and potential areas for improvement.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Performance metrics (Accuracy, <italic>F</italic><sub>1</sub>-score, Recall, and Precision) on the test dataset for 3 models: BERT-base, GPT-2, and T5-base.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">Precision</td></tr></thead><tbody><tr><td align="left" valign="top">BERT</td><td align="char" char="." valign="top">0.9848</td><td align="char" char="." valign="top">0.9854</td><td align="char" char="." valign="top">0.9896</td><td align="char" char="." valign="top">0.9812</td></tr><tr><td align="left" valign="top">GPT-2<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="char" char="." valign="top">0.9460</td><td align="char" char="." valign="top">0.9495</td><td align="char" char="." valign="top">0.9841</td><td align="char" char="." valign="top">0.9117</td></tr><tr><td align="left" valign="top">T5-base (Generic Condition)</td><td align="char" char="." valign="top">0.9763</td><td align="char" char="." valign="top">0.9763</td><td align="char" char="." valign="top">0.9763</td><td align="char" char="." valign="top">0.9764</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>GPT-2: Generative Pre-trained Transformer.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Confusion matrices illustrate the performance of the 3 binary classification models (BERT, GPT-2, and T5-base). GPT-2: Generative Pre-trained Transformer.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e75500_fig02.png"/></fig><p>To evaluate the accuracy of our model on AI-generated data, we used the dataset provided in the study by Du et al [<xref ref-type="bibr" rid="ref35">35</xref>]. We tested our fine-tuned BERT model on this dataset, and the results are shown in <xref ref-type="table" rid="table2">Table 2</xref>. The findings indicate a significant reduction in the number of false positives, decreasing from 27 to 15, representing a 44% improvement. In addition, <xref ref-type="fig" rid="figure3">Figure 3</xref> shows the confusion matrix for our fine-tuned BERT model when applied to the AI-generated misinformation dataset.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>False-positive and true-negative results obtained from testing the fine-tuned BERT model on our combined dataset, compared with the results reported in the study by Zhou et al [<xref ref-type="bibr" rid="ref33">33</xref>].</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">FP<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">TN<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Our</td><td align="left" valign="top">15</td><td align="char" char="." valign="top">485</td></tr><tr><td align="char" char="." valign="top">Zhou et al [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">27</td><td align="left" valign="top">473</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>FP: false-positive.</p></fn><fn id="table2fn2"><p><sup>b</sup>TN: true-negative.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>The figure displays the confusion matrix of our fine-tuned BERT model evaluated on the artificial intelligence&#x2013;generated dataset [<xref ref-type="bibr" rid="ref3">3</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e75500_fig03.png"/></fig></sec><sec id="s3-2"><title>Topic Models</title><p>We used the IFCN dataset, one of the largest datasets on COVID-19 pandemic, to apply and evaluate topic modeling approaches. Three models were tested: LDA, Top2Vec, and BERTopic. After applying each topic model, the top 10 words associated with each topic were selected, and the 3 metrics&#x2014;CV, NPMI, and IRBO&#x2014;were computed to compare the models. <xref ref-type="table" rid="table3">Table 3</xref> summarizes the results across these metrics. Among the models, BERTopic achieved the highest scores across all metrics, leading to its selection for further analysis. In practice, the reported metrics indicate that the topics are moderately coherent and interpretable but not perfectly tight. A coherence score of 0.41 means that the top words in each topic tend to appear together often enough for human analysts to assign clear labels, although some mixing of subthemes is expected. The NPMI of &#x2212;0.086 is close to neutral, which is typical for short, fragmented social media posts, and suggests that while not every word pair strongly co-occurs, the overall topics remain meaningful. Combined with the high IRBO score (0.99), these results imply that the model generates a broad, nonredundant set of topics that cover diverse misinformation themes while remaining practically usable for labeling, interpretation, and downstream health communication tasks. Since in the health-related domain, it is important to cover as many diverse topics as possible, an IRBO value above 0.7 can be considered acceptable, while a CV value equal to or above 0.4 and an NPMI close to 0 or higher are reasonable.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Performance of 3 topic modeling approaches&#x2014;LDA, Top2Vec, and BERTopic&#x2014;evaluated across 3 metrics: Coherence Value, Normalized Pointwise Mutual Information, and Inverse Ranked Based Overlap<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">CV&#x00AD;<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="bottom">NPMI<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="bottom">IRBO<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup> &#x00AD;</td></tr></thead><tbody><tr><td align="left" valign="top">LDA</td><td align="left" valign="top">0.39</td><td align="left" valign="top">&#x2212;0.35</td><td align="left" valign="top">0.96</td></tr><tr><td align="left" valign="top">Top2Vec</td><td align="left" valign="top">0.35</td><td align="left" valign="top">&#x2212;0.29</td><td align="left" valign="top">0.89</td></tr><tr><td align="left" valign="top">BERTopic</td><td align="left" valign="top">0.41</td><td align="left" valign="top">&#x2212;0.086</td><td align="left" valign="top">0.99</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>For all metrics, higher values indicate better performance.</p></fn><fn id="table3fn2"><p><sup>b</sup>CV: Coherence Value.</p></fn><fn id="table3fn3"><p><sup>c</sup>NPMI: Normalized Pointwise Mutual Information.</p></fn><fn id="table3fn4"><p><sup>d</sup>IRBO: Inverse Ranked Based Overlap.</p></fn></table-wrap-foot></table-wrap><p>Many topic modeling approaches, including BERTopic, often encounter limitations when applied to real-world datasets, as they are unable to assign topics to all documents. This can leave a subset of documents unclassified, reducing the overall effectiveness of the model. To address this issue, we have used the algorithm in <xref ref-type="other" rid="box1">Textbox 1</xref>, a method for ensuring comprehensive topic assignment across the dataset [<xref ref-type="bibr" rid="ref68">68</xref>].</p><boxed-text id="box1"><title> Algorithm: assign a document to the closest topic.</title><list list-type="order"><list-item><p>Input:</p><list list-type="bullet"><list-item><p>Raw text documents <inline-formula><mml:math id="ieqn1"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>X</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mrow><mml:msub><mml:mi/><mml:mi>n</mml:mi></mml:msub></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>,</p></list-item><list-item><p>BERTopic model parameters.</p></list-item></list></list-item><list-item><p>Topic modeling:</p><list list-type="bullet"><list-item><p>Compute topics using the BERTopic model:</p><list list-type="simple"><list-item><p>Topics <inline-formula><mml:math id="ieqn2"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mo>{</mml:mo><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>}</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, = BERTopic(<italic>X,P</italic>)</p></list-item></list><p>where <italic>T</italic> is the number of topics and <italic>Y</italic><sub><italic>j</italic></sub> contains documents that are assigned to the topic <italic>j</italic>.</p></list-item></list></list-item><list-item><p>Sentence embeddings:</p><list list-type="bullet"><list-item><p>Transform documents <italic>X</italic> into vector representations using a sentence transformer such as BERT embedding.</p></list-item></list></list-item><list-item><p>Dimensionality reduction:</p><list list-type="bullet"><list-item><p>Apply Uniform Manifold Approximation and Projection for dimensionality reduction on the vector representations.</p></list-item></list></list-item><list-item><p>Cluster centers:</p><list list-type="bullet"><list-item><p>For each topic <italic>j</italic>, compute the center of the cluster:</p><p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munder><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:munder><mml:mfrac><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>n</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mfrac></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula></p><p>where <italic>n</italic><sub><italic>j</italic></sub> is the number of documents in topic <italic>j</italic>, and <italic>x<sub>i</sub></italic> is the reduced vector representation of document.</p></list-item></list></list-item><list-item><p>Topic assignment for unassigned documents:</p><list list-type="bullet"><list-item><p>For every document <italic>d<sub>i</sub></italic> that is not assigned to a topic by the BERTopic model, or for any new document:</p><list list-type="bullet"><list-item><p>Assign the document to the topic <italic>j</italic> that maximizes the cosine similarity between the document vector <italic>x<sub>i</sub></italic> and the cluster center <italic>t<sub>j</sub></italic> :</p><p><disp-formula id="E2"><label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">g</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mfrac><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x22C5;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo symmetric="true">&#x2016;</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo symmetric="true">&#x2016;</mml:mo></mml:mrow><mml:mrow><mml:mo symmetric="true">&#x2016;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo symmetric="true">&#x2016;</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula></p></list-item></list></list-item></list></list-item></list></boxed-text><p>This approach not only ensures that every document in the dataset is assigned a topic but also enhances the interpretability and usability of the topic modeling results. By leveraging the semantic structure of the dataset, our algorithm effectively bridges the gap between unassigned documents and existing topic clusters, making it a robust solution for comprehensive topic coverage.</p></sec><sec id="s3-3"><title>Topic Description</title><p>As described in the &#x201C;Methods&#x201D; section, topic modeling algorithms typically produce word-level representations of topics. While useful for identifying key terms associated with a topic, these representations often lack sufficient contextual information, making interpretation challenging. To overcome this limitation, we developed a structured prompt framework (outlined in <xref ref-type="other" rid="box2">Textbox 2</xref> and used the advanced capabilities of LLMs, specifically ChatGPT-4.0, to generate sentence-level representations for the identified topics. These sentence-level representations provide richer context and more interpretable descriptions, enabling a deeper understanding of the topics.</p><boxed-text id="box2"><title> The prompt structure and 1 example to find topics description.</title><p>Topic description prompt structure:</p><p><named-content content-type="indent">&#x2003;</named-content>System role:</p><p><named-content content-type="indent"><bold>&#x2003;</bold></named-content><named-content content-type="indent"><bold>&#x2003;</bold></named-content> Topic main words: [Top 10 words]</p><p><named-content content-type="indent"><bold>&#x2003;</bold></named-content><named-content content-type="indent"><bold>&#x2003;</bold></named-content>Topic document examples: [5 closest examples to the center of the topic]</p><p><named-content content-type="indent">&#x2003;</named-content>User role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> &#x201C;Describe topic in a short phrase?&#x201D;</p><p>Topic description prompt example:</p><p>System role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Topic main words: [&#x201C;masks,&#x201D; &#x201C;mask,&#x201D; &#x201C;face,&#x201D; &#x201C;wearing,&#x201D; &#x201C;wear,&#x201D; &#x201C;use,&#x201D; &#x201C;oxygen,&#x201D; &#x201C;hypoxia,&#x201D; &#x201C;cause,&#x201D; and &#x201C;you&#x2019;'].</p><list list-type="simple"><list-item><p>Topic document examples:</p><list list-type="order"><list-item><p>Centers for Disease Control and Prevention (CDC) does not recommend wearing masks.</p></list-item><list-item><p>The US CDC contradicted itself by advising people to wear cloth masks against the novel coronavirus while also saying masks do not stop smoke inhalation during a wildfire.</p></list-item><list-item><p>The World Health Organization changed its mind about masks and now says that they can increase the risk of infection.</p></list-item><list-item><p>Nonmedical masks are ineffective in preventing the spread of the disease, are circulating online.</p></list-item><list-item><p>Whether CDC had scheduled announcement that all should wear masks for everyday life.</p></list-item></list></list-item></list><p>User role: &#x201C;Describe topic in a short phrase?&#x201D;</p><p>Output answer: &#x201C;Controversies and debates over mask wearing and its effectiveness&#x201D;</p></boxed-text><p>The prompt includes the top 10 most representative words for each topic as identified by the topic modeling algorithm, and to add context and depth to the topic descriptions, we select 5 documents that are closest to the center of the corresponding topic cluster. The selection of these documents is guided by cosine similarity, performed using equations 1 and 2, which measure the proximity of documents to the cluster center in the semantic space. An example of this process is provided in <xref ref-type="other" rid="box2">Textbox 2</xref>, illustrating how the top words and representative documents are integrated into the prompt to produce a high-quality sentence-level representation.</p><p>By combining these elements, we construct detailed and context-rich prompts that guide ChatGPT-4.0 in generating coherent and semantically accurate sentence-level topic representations. This approach ensures that the abstract themes identified by topic modeling are translated into human-readable and interpretable descriptions.</p><p>To evaluate the quality of the generated topic descriptions, we engaged 3 independent raters to assess the descriptions based on 3 categories: appropriate, somewhat appropriate, and not appropriate. The evaluation results are shown in <xref ref-type="table" rid="table4">Table 4</xref> and highlight that the majority of topic descriptions were well received. Specifically, the total proportion of accepted descriptions (the sum of those rated as appropriate and somewhat appropriate) was 99.6%. This acceptance rate demonstrates the effectiveness and reliability of the proposed method for generating meaningful and contextually relevant topic descriptions. There was perfect agreement in 144 out of 169 (85.2%) of the sentences. Two out of 3 raters agreed on category in 24 out of 169 (14.2%) of the sentences. There was a single instance in which no raters agreed in 1 out of 169 (0.6%) of the sentences.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Percentage of topic descriptions rated as &#x201C;appropriate,&#x201D; &#x201C;somewhat appropriate,&#x201D; and &#x201C;not appropriate&#x201D; by each rater, along with the total number of accepted topic descriptions, calculated as the sum of those rated &#x201C;appropriate&#x201D; and &#x201C;somewhat appropriate.&#x201D;</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Raters</td><td align="left" valign="bottom">Appropriate (%)</td><td align="left" valign="bottom">Somewhat appropriate (%)</td><td align="left" valign="bottom">Not appropriate (%)</td><td align="left" valign="bottom">Total accepted (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Rater 1</td><td align="left" valign="top">98.23</td><td align="left" valign="top">1.77</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top">Rater 2</td><td align="left" valign="top">94.67</td><td align="left" valign="top">5.33</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top">Rater 3</td><td align="left" valign="top">89.94</td><td align="left" valign="top">8.88</td><td align="left" valign="top">1.18</td><td align="left" valign="top">98.82</td></tr><tr><td align="left" valign="top">Average</td><td align="left" valign="top">94.28</td><td align="left" valign="top">5.32</td><td align="left" valign="top">0.39</td><td align="left" valign="top">99.6</td></tr></tbody></table></table-wrap><p>After generating concise descriptions for each topic, we used the structured prompt framework outlined in <xref ref-type="other" rid="box3">Textbox 3</xref>, which includes a list of these topic descriptions. This structured prompt was then input into the ChatGPT-4.0 API to further refine and categorize the topics into overarching themes.</p><boxed-text id="box3"><title> Structure of the prompt for identifying topic themes.</title><p>Finding topic themes prompt structure:</p><p><named-content content-type="indent">&#x2003;</named-content>System role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>The following are topics related to COVID-19 pandemic. Go through all topics and categorize them into relevant groups. Mention topics number for each category.</p><p><named-content content-type="indent">&#x2003;</named-content>User role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Topics description list</p></boxed-text><p>The output from this process not only provides a clear categorization of topics into distinct themes but also includes a concise description for each theme. This step ensures that the topics are grouped in a meaningful and interpretable way, facilitating a deeper understanding of the data&#x2019;s thematic structure.</p><p>The categorized topics and their corresponding theme descriptions are provided in <xref ref-type="fig" rid="figure4">Figures 4</xref> and <xref ref-type="fig" rid="figure5">5</xref>, showcasing the effectiveness of the proposed method in generating coherent and insightful thematic groupings.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Descriptions of themes 1-7 along with the corresponding topic descriptions assigned to each theme. CDC: Centers for Disease Control and Prevention.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e75500_fig04.png"/></fig><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Descriptions of themes 8-13 along with the corresponding topic descriptions assigned to each theme.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e75500_fig05.png"/></fig><p>Using the algorithm in <xref ref-type="other" rid="box1">Textbox 1</xref>, we assign each document to a topic, enabling us to determine the distribution of each theme. <xref ref-type="table" rid="table5">Table 5</xref> shows the distribution of all themes, with theme 4 (Vaccines) and theme 3 (Conspiracy Theories) emerging as the first and second most prevalent misinformation themes. Here are all the themes and their percentages:</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Distribution of COVID-19 misinformation themes.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Theme</td><td align="left" valign="top">Values (N=18,018), n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Theme 1: Home remedies</td><td align="char" char="." valign="top">1334 (7.40)</td></tr><tr><td align="left" valign="top">Theme 2: Deaths and statistics</td><td align="char" char="." valign="top">570 (3.16)</td></tr><tr><td align="left" valign="top">Theme 3: Conspiracy theories</td><td align="char" char="." valign="top">3459 (19.20)</td></tr><tr><td align="left" valign="top">Theme 4: Vaccine</td><td align="char" char="." valign="top">3595 (19.95)</td></tr><tr><td align="left" valign="top">Theme 5: Testing</td><td align="char" char="." valign="top">546 (3.03)</td></tr><tr><td align="left" valign="top">Theme 6: Ivermectin</td><td align="char" char="." valign="top">814 (4.52)</td></tr><tr><td align="left" valign="top">Theme 7: Government</td><td align="char" char="." valign="top">2094 (11.62)</td></tr><tr><td align="left" valign="top">Theme 8: Lockdowns</td><td align="char" char="." valign="top">1316 (7.30)</td></tr><tr><td align="left" valign="top">Theme 9: Transmission</td><td align="char" char="." valign="top">297 (1.65)</td></tr><tr><td align="left" valign="top">Theme 10: Defensive</td><td align="char" char="." valign="top">1269 (7.04)</td></tr><tr><td align="left" valign="top">Theme 11: International</td><td align="char" char="." valign="top">891 (4.95)</td></tr><tr><td align="left" valign="top">Theme 12: Media</td><td align="char" char="." valign="top">252 (1.40)</td></tr><tr><td align="left" valign="top">Theme 13: Minor topics</td><td align="char" char="." valign="top">1581 (8.77)</td></tr></tbody></table></table-wrap><p><xref ref-type="fig" rid="figure6">Figure 6</xref> shows the distribution of each topic within theme 8 over time, illustrating that protests against COVID-19 restrictions were the only misinformation topic that actively persisted even after the release of the COVID-19 vaccine.</p><p>After identifying the misinformation themes, we leverage the explanations provided in the IFCN dataset as a basis to draw refutation arguments to address these themes. For each identified theme, we develop a refutation that aligns with its context, aiming to clarify the nature of the misinformation, its potential origins, and its impact. These refutations are shown in <xref ref-type="other" rid="box4">Textbox 4</xref>. The refutation arguments list for 13 themes, providing valuable insights into the underlying reasons for the misinformation.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Distribution of theme 8 topics during the time. FDA: Food and Drug Administration; WHO: World Health Organization.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e75500_fig06.png"/></fig><boxed-text id="box4"><title> The refutation arguments list for 13 themes.</title><p>Refutation list:</p><list list-type="order"><list-item><p>This content may contain misinformation related to home remedies for COVID-19 prevention and treatment.</p></list-item><list-item><p>This content may contain misinformation related to COVID-19 deaths, statistics, and their relation to vaccination.</p></list-item><list-item><p>This content may contain conspiracy theories and misinformation related to COVID-19, including unverified claims, distorted facts, or manipulated content.</p></list-item><list-item><p>This content may contain misinformation related to COVID-19 vaccines, including false claims about safety, efficacy, and side effects.</p></list-item><list-item><p>This content may contain misinformation related to COVID-19 testing accuracy, including false claims about polymerase chain reaction tests and unscientific self-check methods.</p></list-item><list-item><p>This content may contain misinformation related to COVID-19 treatments, including exaggerated claims about ivermectin, hydroxychloroquine, or unproven supplements.</p></list-item><list-item><p>This content may contain misinformation related to government and political responses to COVID-19 pandemic, including distorted facts, fabricated claims, or misrepresentation of policies and actions.</p></list-item><list-item><p>This content may contain misinformation related to COVID-19 lockdowns and restrictions, including fabricated or misrepresented events, videos, or claims.</p></list-item><list-item><p>This content may contain misinformation related to COVID-19 transmission and survival in various environments, including unverified claims about foods, surfaces, or environmental factors.</p></list-item><list-item><p>This content may contain misinformation related to defensive and protective measures against COVID-19 pandemic, including false or exaggerated claims about masks, sanitizers, UV rays, or disinfectants.</p></list-item><list-item><p>This content may contain misinformation related to international incidents and responses to COVID-19 pandemic, including fabricated reports of government actions, health care capacity, or global cooperation.</p></list-item><list-item><p>This content may contain misinformation related to the spread of COVID-19 information on social media and messaging apps, including false claims about government policies, platforms, or media manipulation.</p></list-item><list-item><p>This content may contain miscellaneous misinformation related to COVID-19 pandemic, including distortions about products, religious practices, bioweapons, and other fabricated claims.</p></list-item></list></boxed-text></sec><sec id="s3-4"><title>Provide Refutation</title><p>In the final stage of our process, we design a prompt text to enable ChatGPT-4.0 to detect specific misinformation themes. The prompt text includes a detailed description of the themes and a question-answer list. To create this question-answer list, we select the document closest to the center of each topic and associate it with the corresponding theme. Detailed information about the prompt text can be found in <xref ref-type="other" rid="box5">Textbox 5</xref>.</p><boxed-text id="box5"><title> Prompt structure and 1 example to find a document theme.</title><p>Finding document themes prompt structure:</p><p><named-content content-type="indent">&#x2003;</named-content>System role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>&#x201C;The following is the description of topic themes related to COVID-19 misinformation. Find the closest theme for the given text. Answer in a consistent style.&#x201D;</p><p><named-content content-type="indent">&#x2003;</named-content>User role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Themes description list</p><p><named-content content-type="indent">&#x2003;</named-content>Assistant role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Question-answer list</p><p><named-content content-type="indent">&#x2003;</named-content>User role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Input text</p><p>Finding topic themes prompt example:</p><p><named-content content-type="indent">&#x2003;</named-content>System role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>&#x201C;The following is the description of topic themes related to COVID-19 misinformation. Find the closest theme for the given text. Answer in a consistent style.&#x201D;</p><p><named-content content-type="indent">&#x2003;</named-content>User role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Themes description list</p><p><named-content content-type="indent">&#x2003;</named-content>Assistant role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Question-answer list</p><p><named-content content-type="indent">&#x2003;</named-content>User role:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>&#x201C;A video shows that Bill Gates admits the vaccine will no doubt kill 700,000 people.&#x201D;</p><p><named-content content-type="indent">&#x2003;</named-content>Output answer:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Theme 3: &#x201C;Conspiracy Theories and Misinformation&#x201D;</p></boxed-text><p>To evaluate our approach, we randomly selected 130 (10 documents per theme) documents from the IFCN dataset that are not included in the question-answer list. We then tested the prompting method with ChatGPT-4.0, achieving an 82% accuracy rate in detecting the correct themes. Moreover, as showing in <xref ref-type="fig" rid="figure7">Figure 7</xref> the model struggled to detect themes 2, 7, and 13 in comparisons with other themes.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Confusion matrix of theme detector.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e75500_fig07.png"/></fig></sec><sec id="s3-5"><title>Proposed System</title><p>Based on the process we introduced, we propose the development of an MDIS. This system is designed to first determine whether a given text document contains misinformation or not. If the document is identified as containing misinformation, the system then detects its theme and provides a detailed refutation of the misinformation. The primary objectives of MDIS are to prevent the spread of misinformation and to enhance public health knowledge.</p><p>MDIS operates by integrating 3 key components. First, it uses a trained LLM to classify text documents as either misinformation or nonmisinformation. Next, it uses another trained LLM to detect the specific misinformation theme within documents identified as containing misinformation. Finally, it leverages a refutation list, which is generated during the theme description phase, to provide context and counternarratives for each detected theme. This comprehensive approach enables the system to effectively address misinformation while equipping users with accurate information.</p><p><xref ref-type="fig" rid="figure1">Figure 1C</xref> illustrates an example of an input to MDIS and its corresponding output, demonstrating how the system analyzes a document, identifies misinformation, detects the associated theme, and presents an explanatory response.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>This study developed the MDIP, a structured workflow for analyzing health misinformation and generating explanatory counterarguments. Building on MDIP, we designed a prototype MDIS that combines detection, topic detection, theme identification, and refutation generation. While the system has not yet been deployed in real-world environments, the results illustrate its potential to support public health communication.</p><p>In the misinformation classification task, LLMs achieved reasonable performance, with BERT reaching 98% accuracy. Enriching training datasets also reduced false positives on AI-generated misinformation by 44% compared with prior baselines, suggesting improved robustness across different linguistic styles. Topic modeling experiments highlighted the advantages of BERTopic relative to LDA and Top2Vec, with higher coherence and diversity metrics. To address the issue of unassigned documents, the algorithm in <xref ref-type="other" rid="box1">Textbox 1</xref> was used to assign outliers to their nearest topic cluster, thereby improving coverage of the dataset. To enhance interpretability, word-level topic outputs were converted into sentence-level descriptions through prompt engineering with ChatGPT-4.0. These descriptions were judged appropriate or somewhat appropriate in 99.6% of cases by independent raters, indicating the value of sentence-level representations for clarity and interpretability. Building on these descriptions, topics were further grouped into broader themes and linked with theme-specific refutations.</p><p>Finally, a prompt-based detector for misinformation themes achieved 82% accuracy. These results demonstrate how detection, topic modeling, thematic grouping, and refutation can be integrated into a single workflow. The prototype system (MDIS) represents an illustration of concept, and while the findings are encouraging, validation is limited to English language, COVID-19&#x2013;related data, and offline testing. Broader generalization, multilingual adaptation, integration into health communication workflows, and longitudinal evaluation remain important directions for future work.</p></sec><sec id="s4-2"><title>Limitations</title><p>While this study presents a framework for detecting and addressing misinformation, several limitations should be acknowledged. First, the system&#x2019;s performance relies on the quality and diversity of the datasets that were used for training and topic modeling, which may not fully capture the linguistic and contextual nuances of misinformation in different regions, languages, or cultural contexts. This limitation introduces a potential bias: refutations that appear clear and persuasive in one sociocultural context may be ineffective&#x2014;or even counterproductive&#x2014;in another.</p><p>The model has been tested solely on English text&#x2013;based misinformation, and it has not been evaluated for multilingual and multimodal adaptation, meaning its ability to detect misinformation and provide persuasive refutation across different languages and sociocultural contexts remains uncertain. Moreover, while the theme detection module achieved an accuracy of 82%, this leaves an 18% error margin, especially in ambiguous or overlapping themes, which can lead to inaccuracies in refutation and reduce the overall effectiveness of the generated warning texts. Developing and integrating more sophisticated algorithms to address overlapping in topic themes could substantially enhance the accuracy of theme detection. Improved theme separation would not only yield clearer and more coherent thematic structures but also strengthen the generation of precise and contextually relevant refutations. In turn, this refinement would enable more effective countermeasures against health misinformation, thereby improving the system&#x2019;s overall capacity to support public health communication and trust. False positives and negatives remain a concern, particularly when misinformation contains opinion-based, satirical, or context-dependent elements. Although the generated refutations follow a systematic structure, they may not always be contextually relevant, persuasive, or ethically suitable for diverse audiences. In the absence of a human-in-the-loop or oversight mechanism, the system may produce counterarguments that fail to resonate with users or could be perceived as unreliable. In addition, the system has not yet been extensively tested in real-world applications, which limits understanding of its practical impact on misinformation spread and public health outcomes. Furthermore, misinformation evolves over time, and a model trained on past narratives may require periodic retraining to remain effective against emerging falsehoods, including AI-generated misinformation. Using pretrained LLMs such as ChatGPT depends on the current version of the model and its accessibility to users. Therefore, it is necessary to update the system regularly when the model is changed or becomes unavailable. Moreover, since passing datasets through third-party platforms may compromise the security of the framework, future work could focus on developing an in-house solution by training a dedicated model for our specific tasks, thereby eliminating the reliance on external platforms. Finally, the reliance on automated methods raises potential concerns about interpretability and transparency, which are crucial for fostering trust and adoption by end users.</p></sec><sec id="s4-3"><title>Comparison With Prior Work</title><p>The proposed MDIP and the resulting MDIS build upon and advance the body of research focused on misinformation detection and mitigation. The proposed method transforms raw posts into <italic>actionable units</italic>&#x2014;sentence-level topic labels, aggregated themes, and paired refutations&#x2014;linking detection outputs directly to message design and response playbooks used by health teams. Previous research has demonstrated the efficacy of ML models, particularly deep learning approaches, in detecting misinformation. They used ML techniques to classify fake news using textual features, demonstrating the value of automated detection methods [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref69">69</xref>]. Our study extends these efforts by integrating enriched datasets containing both formal and informal language styles, ensuring better generalization across diverse linguistic sources, including AI-generated misinformation.</p><p>Topic modeling techniques such as LDA have been used in prior studies to analyze misinformation [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. Our approach improves on these works by addressing limitations in document assignment and theme interpretation. We used an algorithm to assign every document to the most relevant topic, resolving the common issue of unclassified documents in topic modeling. In addition, we moved beyond word-level topic representations to generate sentence-level descriptions, offering richer and more interpretable insights. By tracking shifts in sentence-level topics and theme distributions, communicators can conduct pre-/postassessments of campaigns or platform policy changes, complementing survey-based outcomes. Finally, we designed an effective prompt text to automatically identify the themes of misinformation. This automated approach reduces reliance on manual interpretation, minimizing human bias and increasing scalability.</p><p>Many prior studies have addressed misinformation detection or topic analysis in isolation. They analyzed misinformation using sentiment analysis but did not integrate detection with thematic analysis and did not provide a framework for counteracting misinformation [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref70">70</xref>]. Our work unifies detection, topic modeling, thematic refutation, and public health intervention in a single framework. The MDIS framework automates the end-to-end process, offering a scalable solution to tackle the complexity of misinformation dynamics.</p></sec><sec id="s4-4"><title>Conclusions</title><p>This work contributes a methodological framework for infodemiology and digital health operations. We transform misinformation into actionable units&#x2014;themes and refutations&#x2014;so that health teams can act (communicate, triage, and evaluate). Moreover, analyzing misinformation using a hierarchical (2-level) sentence-level description and assigning all documents to topics makes it possible to observe theme and topic distributions over time, providing a broad and sensible overview of misinformation. Sentence-level topics and theme distributions serve as measurable indicators for surveillance and intervention evaluation (eg, pre-/postcampaign shifts and surge detection). We introduce MDIP and MDIS that enable rapid response playbooks and reduce analyst workload. To support adoption, we release prompt templates and code as implementation artifacts that teams can readily adapt. Real-world deployment, however, requires governance mechanisms (human-in-the-loop review and audit logs), multilingual extensions, and prospective trials with health agencies or platforms to quantify downstream impact (eg, reduced spread and improved literacy). Ultimately, these contributions orient detection toward operational use&#x2014;prioritizing interpretability and intervention design&#x2014;so that public health actors can move from finding misinformation to effectively countering it.</p></sec></sec></body><back><notes><sec><title>Funding</title><p>This research is supported in part by a research grant from the Investigator-Initiated Studies Program of Merck Sharp &#x0026; Dohme Corp (MISP #102050). The opinions expressed in this paper are those of the authors and do not necessarily represent those of Merck Sharp &#x0026; Dohme Corp.</p></sec><sec><title>Data Availability</title><p>All implementation codes can be accessed through the GitHub repository [<xref ref-type="bibr" rid="ref71">71</xref>].</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb3">CDC</term><def><p>Centers for Disease Control and Prevention</p></def></def-item><def-item><term id="abb4">CV</term><def><p>Coherence Value</p></def></def-item><def-item><term id="abb5">GPT-2 </term><def><p>Generative Pre-trained Transformer 2</p></def></def-item><def-item><term id="abb6">HPV</term><def><p>human papillomavirus</p></def></def-item><def-item><term id="abb7">IRBO</term><def><p>Inverted Rank-Biased Overlap</p></def></def-item><def-item><term id="abb8">LDA</term><def><p>Latent Dirichlet Allocation</p></def></def-item><def-item><term id="abb9">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb10">MDIP</term><def><p>Misinformation Detection and Inoculation Process</p></def></def-item><def-item><term id="abb11">MDIS</term><def><p>Misinformation Detection and Inoculation System</p></def></def-item><def-item><term id="abb12">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb13">NPMI</term><def><p>Normalized Pointwise Mutual Information</p></def></def-item><def-item><term id="abb14">T5-base</term><def><p>Text-to-Text Transfer Transformer</p></def></def-item><def-item><term id="abb15">WHO</term><def><p>World Health Organization</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kisa</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kisa</surname><given-names>A</given-names> </name></person-group><article-title>A comprehensive analysis of COVID-19 misinformation, public health impacts, and communication strategies: scoping review</article-title><source>J Med Internet Res</source><year>2024</year><month>08</month><day>21</day><volume>26</volume><fpage>e56931</fpage><pub-id pub-id-type="doi">10.2196/56931</pub-id><pub-id pub-id-type="medline">39167790</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Loomba</surname><given-names>S</given-names> </name><name name-style="western"><surname>de Figueiredo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Piatek</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>de Graaf</surname><given-names>K</given-names> </name><name name-style="western"><surname>Larson</surname><given-names>HJ</given-names> </name></person-group><article-title>Measuring the impact of COVID-19 vaccine misinformation on vaccination intent in the UK and USA</article-title><source>Nat Hum Behav</source><year>2021</year><month>03</month><volume>5</volume><issue>3</issue><fpage>337</fpage><lpage>348</lpage><pub-id pub-id-type="doi">10.1038/s41562-021-01056-1</pub-id><pub-id pub-id-type="medline">33547453</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moscadelli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Albora</surname><given-names>G</given-names> </name><name name-style="western"><surname>Biamonte</surname><given-names>MA</given-names> </name><etal/></person-group><article-title>Fake news and Covid-19 in Italy: results of a quantitative observational study</article-title><source>Int J Environ Res Public Health</source><year>2020</year><month>08</month><day>12</day><volume>17</volume><issue>16</issue><fpage>5850</fpage><pub-id pub-id-type="doi">10.3390/ijerph17165850</pub-id><pub-id pub-id-type="medline">32806772</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chou</surname><given-names>WYS</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>A</given-names> </name><name name-style="western"><surname>Klein</surname><given-names>WMP</given-names> </name></person-group><article-title>Addressing health-related misinformation on social media</article-title><source>JAMA</source><year>2018</year><month>12</month><day>18</day><volume>320</volume><issue>23</issue><fpage>2417</fpage><lpage>2418</lpage><pub-id pub-id-type="doi">10.1001/jama.2018.16865</pub-id><pub-id pub-id-type="medline">30428002</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kata</surname><given-names>A</given-names> </name></person-group><article-title>Anti-vaccine activists, Web 2.0, and the postmodern paradigm &#x2013; an overview of tactics and tropes used online by the anti-vaccination movement</article-title><source>Vaccine (Auckl)</source><year>2012</year><month>05</month><volume>30</volume><issue>25</issue><fpage>3778</fpage><lpage>3789</lpage><pub-id pub-id-type="doi">10.1016/j.vaccine.2011.11.112</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zimet</surname><given-names>GD</given-names> </name><name name-style="western"><surname>Rosberger</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Fisher</surname><given-names>WA</given-names> </name><name name-style="western"><surname>Perez</surname><given-names>S</given-names> </name><name name-style="western"><surname>Stupiansky</surname><given-names>NW</given-names> </name></person-group><article-title>Beliefs, behaviors and HPV vaccine: correcting the myths and the misinformation</article-title><source>Prev Med</source><year>2013</year><month>11</month><volume>57</volume><issue>5</issue><fpage>414</fpage><lpage>418</lpage><pub-id pub-id-type="doi">10.1016/j.ypmed.2013.05.013</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Poland</surname><given-names>GA</given-names> </name><name name-style="western"><surname>Jacobson</surname><given-names>RM</given-names> </name></person-group><article-title>Understanding those who do not understand: a brief review of the anti-vaccine movement</article-title><source>Vaccine (Auckl)</source><year>2001</year><month>03</month><volume>19</volume><issue>17-19</issue><fpage>2440</fpage><lpage>2445</lpage><pub-id pub-id-type="doi">10.1016/S0264-410X(00)00469-2</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kata</surname><given-names>A</given-names> </name></person-group><article-title>A postmodern Pandora&#x2019;s box: anti-vaccination misinformation on the internet</article-title><source>Vaccine (Auckl)</source><year>2010</year><month>02</month><day>17</day><volume>28</volume><issue>7</issue><fpage>1709</fpage><lpage>1716</lpage><pub-id pub-id-type="doi">10.1016/j.vaccine.2009.12.022</pub-id><pub-id pub-id-type="medline">20045099</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oyeyemi</surname><given-names>SO</given-names> </name><name name-style="western"><surname>Gabarron</surname><given-names>E</given-names> </name><name name-style="western"><surname>Wynn</surname><given-names>R</given-names> </name></person-group><article-title>Ebola, Twitter, and misinformation: a dangerous combination?</article-title><source>BMJ</source><year>2014</year><month>10</month><day>14</day><volume>349</volume><fpage>g6178</fpage><pub-id pub-id-type="doi">10.1136/bmj.g6178</pub-id><pub-id pub-id-type="medline">25315514</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Geoghegan</surname><given-names>S</given-names> </name><name name-style="western"><surname>O&#x2019;Callaghan</surname><given-names>KP</given-names> </name><name name-style="western"><surname>Offit</surname><given-names>PA</given-names> </name></person-group><article-title>Vaccine safety: myths and misinformation</article-title><source>Front Microbiol</source><year>2020</year><volume>11</volume><fpage>372</fpage><pub-id pub-id-type="doi">10.3389/fmicb.2020.00372</pub-id><pub-id pub-id-type="medline">32256465</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>X</given-names> </name><name name-style="western"><surname>Coiera</surname><given-names>E</given-names> </name><name name-style="western"><surname>Tsafnat</surname><given-names>G</given-names> </name><name name-style="western"><surname>Arachi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ong</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Dunn</surname><given-names>AG</given-names> </name></person-group><article-title>Using social connection information to improve opinion mining: identifying negative sentiment about HPV vaccines on twitter</article-title><source>Studies in Health Technology and Informatics</source><year>2015</year><fpage>761</fpage><lpage>765</lpage><pub-id pub-id-type="doi">10.3233/978-1-61499-564-7-761</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ghaddar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Khandaqji</surname><given-names>S</given-names> </name><name name-style="western"><surname>Awad</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Kansoun</surname><given-names>R</given-names> </name></person-group><article-title>Conspiracy beliefs and vaccination intent for COVID-19 in an infodemic</article-title><source>PLoS One</source><year>2022</year><volume>17</volume><issue>1</issue><fpage>e0261559</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0261559</pub-id><pub-id pub-id-type="medline">35020721</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Ghosh</surname><given-names>D</given-names> </name><name name-style="western"><surname>Scott</surname><given-names>B</given-names> </name></person-group><article-title>Disinformation is becoming unstoppable</article-title><source>TIME</source><year>2018</year><access-date>2025-11-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://time.com/5112847/facebook-fake-news-unstoppable/">https://time.com/5112847/facebook-fake-news-unstoppable/</ext-link></comment></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Qazvinian</surname><given-names>V</given-names> </name><name name-style="western"><surname>Rosengren</surname><given-names>E</given-names> </name><name name-style="western"><surname>Radev</surname><given-names>D</given-names> </name><name name-style="western"><surname>Mei</surname><given-names>Q</given-names> </name></person-group><article-title>Rumor has it: identifying misinformation in microblogs</article-title><year>2011</year><access-date>2025-12-19</access-date><conf-name>Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>Jul 27-31, 2011</conf-date><conf-loc>Edinburgh, Scotland, UK</conf-loc><fpage>1589</fpage><lpage>1599</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/D11-1147.pdf">https://aclanthology.org/D11-1147.pdf</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>McKee</surname><given-names>M</given-names> </name><name name-style="western"><surname>Torbica</surname><given-names>A</given-names> </name><name name-style="western"><surname>Stuckler</surname><given-names>D</given-names> </name></person-group><article-title>Systematic literature review on the spread of health-related misinformation on social media</article-title><source>Soc Sci Med</source><year>2019</year><month>11</month><volume>240</volume><fpage>112552</fpage><pub-id pub-id-type="doi">10.1016/j.socscimed.2019.112552</pub-id><pub-id pub-id-type="medline">31561111</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Broniatowski</surname><given-names>DA</given-names> </name><name name-style="western"><surname>Jamison</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Qi</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Weaponized health communication: Twitter bots and Russian trolls amplify the vaccine debate</article-title><source>Am J Public Health</source><year>2018</year><month>10</month><volume>108</volume><issue>10</issue><fpage>1378</fpage><lpage>1384</lpage><pub-id pub-id-type="doi">10.2105/AJPH.2018.304567</pub-id><pub-id pub-id-type="medline">30138075</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zarocostas</surname><given-names>J</given-names> </name></person-group><article-title>How to fight an infodemic</article-title><source>Lancet</source><year>2020</year><month>02</month><day>29</day><volume>395</volume><issue>10225</issue><fpage>676</fpage><pub-id pub-id-type="doi">10.1016/S0140-6736(20)30461-X</pub-id><pub-id pub-id-type="medline">32113495</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Islam</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Kamal</surname><given-names>AHM</given-names> </name><name name-style="western"><surname>Kabir</surname><given-names>A</given-names> </name><etal/></person-group><article-title>COVID-19 vaccine rumors and conspiracy theories: the need for cognitive inoculation against misinformation to improve vaccine adherence</article-title><source>PLoS One</source><year>2021</year><volume>16</volume><issue>5</issue><fpage>e0251605</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0251605</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gallotti</surname><given-names>R</given-names> </name><name name-style="western"><surname>Valle</surname><given-names>F</given-names> </name><name name-style="western"><surname>Castaldo</surname><given-names>N</given-names> </name><name name-style="western"><surname>Sacco</surname><given-names>P</given-names> </name><name name-style="western"><surname>De Domenico</surname><given-names>M</given-names> </name></person-group><article-title>Assessing the risks of &#x201C;infodemics&#x201D; in response to COVID-19 epidemics</article-title><source>Nat Hum Behav</source><year>2020</year><month>12</month><volume>4</volume><issue>12</issue><fpage>1285</fpage><lpage>1293</lpage><pub-id pub-id-type="doi">10.1038/s41562-020-00994-6</pub-id><pub-id pub-id-type="medline">33122812</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cinelli</surname><given-names>M</given-names> </name><name name-style="western"><surname>Quattrociocchi</surname><given-names>W</given-names> </name><name name-style="western"><surname>Galeazzi</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The COVID-19 social media infodemic</article-title><source>Sci Rep</source><year>2020</year><volume>10</volume><issue>1</issue><pub-id pub-id-type="doi">10.1038/s41598-020-73510-5</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mian</surname><given-names>A</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>S</given-names> </name></person-group><article-title>Coronavirus: the spread of misinformation</article-title><source>BMC Med</source><year>2020</year><month>03</month><day>18</day><volume>18</volume><issue>1</issue><fpage>89</fpage><pub-id pub-id-type="doi">10.1186/s12916-020-01556-3</pub-id><pub-id pub-id-type="medline">32188445</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kumar</surname><given-names>N</given-names> </name><name name-style="western"><surname>Corpus</surname><given-names>I</given-names> </name><name name-style="western"><surname>Hans</surname><given-names>M</given-names> </name><etal/></person-group><article-title>COVID-19 vaccine perceptions in the initial phases of US vaccine roll-out: an observational study on reddit</article-title><source>BMC Public Health</source><year>2022</year><month>03</month><day>7</day><volume>22</volume><issue>1</issue><fpage>446</fpage><pub-id pub-id-type="doi">10.1186/s12889-022-12824-7</pub-id><pub-id pub-id-type="medline">35255881</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name><name name-style="western"><surname>Dai</surname><given-names>Y</given-names> </name></person-group><article-title>Misinformation and the paradox of trust during the covid-19 pandemic in the U.S.: pathways to risk perception and compliance behaviors</article-title><source>J Risk Res</source><year>2023</year><month>05</month><day>4</day><volume>26</volume><issue>5</issue><fpage>469</fpage><lpage>484</lpage><pub-id pub-id-type="doi">10.1080/13669877.2023.2176910</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hou</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Du</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Cross-country comparison of public awareness, rumors, and behavioral responses to the COVID-19 epidemic: infodemiology study</article-title><source>J Med Internet Res</source><year>2020</year><volume>22</volume><issue>8</issue><fpage>e21143</fpage><pub-id pub-id-type="doi">10.2196/21143</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bavel</surname><given-names>JJV</given-names> </name><name name-style="western"><surname>Baicker</surname><given-names>K</given-names> </name><name name-style="western"><surname>Boggio</surname><given-names>PS</given-names> </name><etal/></person-group><article-title>Using social and behavioural science to support COVID-19 pandemic response</article-title><source>Nat Hum Behav</source><year>2020</year><volume>4</volume><issue>5</issue><fpage>460</fpage><lpage>471</lpage><pub-id pub-id-type="doi">10.1038/s41562-020-0884-z</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schiffman</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Bauer</surname><given-names>HM</given-names> </name><name name-style="western"><surname>Hoover</surname><given-names>RN</given-names> </name><etal/></person-group><article-title>Epidemiologic evidence showing that human papillomavirus infection causes most cervical intraepithelial neoplasia</article-title><source>J Natl Cancer Inst</source><year>1993</year><month>06</month><day>16</day><volume>85</volume><issue>12</issue><fpage>958</fpage><lpage>964</lpage><pub-id pub-id-type="doi">10.1093/jnci/85.12.958</pub-id><pub-id pub-id-type="medline">8388478</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bosch</surname><given-names>FX</given-names> </name><name name-style="western"><surname>Manos</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Munoz</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Prevalence of human papillomavirus in cervical cancer: a worldwide perspective</article-title><source>JNCI Journal of the National Cancer Institute</source><year>1995</year><month>06</month><day>7</day><volume>87</volume><issue>11</issue><fpage>796</fpage><lpage>802</lpage><pub-id pub-id-type="doi">10.1093/jnci/87.11.796</pub-id><pub-id pub-id-type="medline">7791229</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Jemal</surname><given-names>A</given-names> </name></person-group><article-title>Cancer statistics, 2019</article-title><source>CA Cancer J Clin</source><year>2019</year><month>01</month><volume>69</volume><issue>1</issue><fpage>7</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.3322/caac.21551</pub-id><pub-id pub-id-type="medline">30620402</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>Reasons to get vaccinated</article-title><source>Centers for Disease Control and Prevention</source><year>2021</year><access-date>2025-11-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/hpv">https://www.cdc.gov/hpv</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pingali</surname><given-names>C</given-names> </name><name name-style="western"><surname>Yankey</surname><given-names>D</given-names> </name><name name-style="western"><surname>Elam-Evans</surname><given-names>LD</given-names> </name><etal/></person-group><article-title>National, regional, state, and selected local area vaccination coverage among adolescents aged 13-17 Years&#x2014;United States, 2020</article-title><source>MMWR Morb Mortal Wkly Rep</source><year>2021</year><month>09</month><day>3</day><volume>70</volume><issue>35</issue><fpage>1183</fpage><lpage>1190</lpage><pub-id pub-id-type="doi">10.15585/mmwr.mm7035a1</pub-id><pub-id pub-id-type="medline">34473682</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moorhead</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Hazlett</surname><given-names>DE</given-names> </name><name name-style="western"><surname>Harrison</surname><given-names>L</given-names> </name><name name-style="western"><surname>Carroll</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Irwin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hoving</surname><given-names>C</given-names> </name></person-group><article-title>A new dimension of health care: systematic review of the uses, benefits, and limitations of social media for health communication</article-title><source>J Med Internet Res</source><year>2013</year><month>04</month><day>23</day><volume>15</volume><issue>4</issue><fpage>e85</fpage><pub-id pub-id-type="doi">10.2196/jmir.1933</pub-id><pub-id pub-id-type="medline">23615206</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Levin</surname><given-names>S</given-names> </name></person-group><article-title>Facebook promised to tackle fake news but the evidence shows it&#x2019;s not working</article-title><source>The Guardian</source><year>2017</year><access-date>2025-12-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.theguardian.com/technology/2017/may/16/facebook-fake-news-tools-not-working">https://www.theguardian.com/technology/2017/may/16/facebook-fake-news-tools-not-working</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Parker</surname><given-names>AG</given-names> </name><name name-style="western"><surname>De Choudhury</surname><given-names>M</given-names> </name></person-group><article-title>Synthetic lies: understanding ai-generated misinformation and evaluating algorithmic and human solutions</article-title><source>CHI &#x2019;23: Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems</source><year>2023</year><fpage>1</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1145/3544548.3581318</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Nirmal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>H</given-names> </name></person-group><article-title>Disinformation detection: an evolving challenge in the age of llms</article-title><source>Proceedings of the 2024 SIAM International Conference on Data Mining (SDM)</source><year>2024</year><publisher-name>Society for Industrial and Applied Mathematics Publications</publisher-name><fpage>427</fpage><lpage>435</lpage><pub-id pub-id-type="doi">10.1137/1.9781611978032.50</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Du</surname><given-names>J</given-names> </name><name name-style="western"><surname>Preston</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Using machine learning-based approaches for the detection and classification of human papillomavirus vaccine misinformation: infodemiology study of Reddit discussions</article-title><source>J Med Internet Res</source><year>2021</year><month>08</month><day>5</day><volume>23</volume><issue>8</issue><fpage>e26478</fpage><pub-id pub-id-type="doi">10.2196/26478</pub-id><pub-id pub-id-type="medline">34383667</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tomaszewski</surname><given-names>T</given-names> </name><name name-style="western"><surname>Morales</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lourentzou</surname><given-names>I</given-names> </name><etal/></person-group><article-title>Identifying false human papillomavirus (HPV) vaccine information and corresponding risk perceptions from twitter: advanced predictive models</article-title><source>J Med Internet Res</source><year>2021</year><month>09</month><day>9</day><volume>23</volume><issue>9</issue><fpage>e30451</fpage><pub-id pub-id-type="doi">10.2196/30451</pub-id><pub-id pub-id-type="medline">34499043</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Farajijalal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Malek</surname><given-names>S</given-names> </name><name name-style="western"><surname>Toudeshki</surname><given-names>A</given-names> </name><name name-style="western"><surname>Viers</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Ehsani</surname><given-names>R</given-names> </name></person-group><article-title>Data-driven model to improve mechanical harvesters for nut trees</article-title><year>2024</year><conf-name>2024 ASABE Annual International Meeting</conf-name><conf-date>Jul 28-31, 2024</conf-date><conf-loc>California</conf-loc><fpage>1</fpage><pub-id pub-id-type="doi">10.13031/aim.202400858</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Malek</surname><given-names>S</given-names> </name><name name-style="western"><surname>Salehkaleybar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Amini</surname><given-names>A</given-names> </name></person-group><article-title>Multi variable-layer neural networks for decoding linear codes</article-title><year>2020</year><conf-name>2020 8th Iran Workshop on Communication and Information Theory (IWCIT)</conf-name><conf-date>May 26-28, 2020</conf-date><conf-loc>Tehran, Iran</conf-loc><fpage>1</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1109/IWCIT50667.2020.9163473</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Chui</surname><given-names>M</given-names> </name><name name-style="western"><surname>Manyika</surname><given-names>J</given-names> </name><name name-style="western"><surname>Miremadi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Henke</surname><given-names>N</given-names> </name><name name-style="western"><surname>Chung</surname><given-names>R</given-names> </name><name name-style="western"><surname>Nel</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Notes from the AI frontier: insights from hundreds of use cases</article-title><year>2018</year><publisher-name>McKinsey Global Institute</publisher-name><fpage>1</fpage><lpage>31</lpage></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Acemoglu</surname><given-names>D</given-names> </name><name name-style="western"><surname>Restrepo</surname><given-names>P</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Agrawal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gans</surname><given-names>J</given-names> </name><name name-style="western"><surname>Goldfarb</surname><given-names>A</given-names> </name></person-group><article-title>Artificial intelligence, automation, and work</article-title><source>The Economics of Artificial Intelligence: An Agenda</source><year>2018</year><publisher-name>University of Chicago Press</publisher-name><fpage>197</fpage><lpage>236</lpage><pub-id pub-id-type="other">978-0-226-61333-8</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Micallef</surname><given-names>N</given-names> </name><name name-style="western"><surname>He</surname><given-names>B</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ahamad</surname><given-names>M</given-names> </name><name name-style="western"><surname>Memon</surname><given-names>N</given-names> </name></person-group><article-title>The role of the crowd in countering misinformation: a case study of the COVID-19 infodemic</article-title><year>2020</year><conf-name>2020 IEEE International Conference on Big Data (Big Data)</conf-name><conf-date>Dec 10-13, 2020</conf-date><conf-loc>Atlanta, GA, USA</conf-loc><fpage>748</fpage><lpage>757</lpage><pub-id pub-id-type="doi">10.1109/BigData50022.2020.9377956</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ahamad</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>S</given-names> </name></person-group><article-title>Reinforcement learning-based counter-misinformation response generation: a case study of COVID-19 vaccine misinformation</article-title><source>WWW &#x2019;23: Proceedings of the ACM Web Conference 2023</source><fpage>2698</fpage><lpage>2709</lpage><pub-id pub-id-type="doi">10.1145/3543507.3583388</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Jia</surname><given-names>R</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>P</given-names> </name></person-group><article-title>Adversarial examples for evaluating reading comprehension systems</article-title><source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source><year>2017</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>2021</fpage><lpage>2031</lpage><pub-id pub-id-type="doi">10.18653/v1/D17-1215</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Ribeiro</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><article-title>Semantically equivalent adversarial rules for debugging NLP models</article-title><source>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1)</source><year>2018</year><publisher-name>Association for Computational Linguistics</publisher-name><pub-id pub-id-type="doi">10.18653/v1/P18-1079</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Pham</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hoyle</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>S</given-names> </name><name name-style="western"><surname>Resnik</surname><given-names>P</given-names> </name><name name-style="western"><surname>Iyyer</surname><given-names>M</given-names> </name></person-group><article-title>TopicGPT: a prompt-based topic modeling framework</article-title><source>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics</source><year>2024</year><publisher-name>Association for Computational Linguistics</publisher-name><pub-id pub-id-type="doi">10.18653/v1/2024.naacl-long.164</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Mu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bontcheva</surname><given-names>K</given-names> </name><name name-style="western"><surname>Song</surname><given-names>X</given-names> </name></person-group><article-title>Large language models offer an alternative to the traditional approach of topic modelling</article-title><source>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</source><year>2024</year><access-date>2025-12-22</access-date><publisher-name>ELRA and ICCL</publisher-name><fpage>10160</fpage><lpage>10171</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2024.lrec-main.887/">https://aclanthology.org/2024.lrec-main.887/</ext-link></comment></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Doi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Isonuma</surname><given-names>M</given-names> </name><name name-style="western"><surname>Yanaka</surname><given-names>H</given-names> </name></person-group><article-title>Topic modeling for short texts with large language models</article-title><source>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4)</source><year>2024</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>21</fpage><lpage>33</lpage><pub-id pub-id-type="doi">10.18653/v1/2024.acl-srw.3</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Banas</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Rains</surname><given-names>SA</given-names> </name></person-group><article-title>A meta-analysis of research on inoculation theory</article-title><source>Commun Monogr</source><year>2010</year><month>09</month><volume>77</volume><issue>3</issue><fpage>281</fpage><lpage>311</lpage><pub-id pub-id-type="doi">10.1080/03637751003758193</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Patwa</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sharma</surname><given-names>S</given-names> </name><name name-style="western"><surname>Pykl</surname><given-names>S</given-names> </name><name name-style="western"><surname>Guptha</surname><given-names>V</given-names> </name><name name-style="western"><surname>Kumari</surname><given-names>G</given-names> </name><name name-style="western"><surname>Akhtar</surname><given-names>M</given-names> </name></person-group><article-title>Fighting an infodemic: COVID-19 fake news dataset</article-title><year>2021</year><conf-name>Combating Online Hostile Posts in Regional Languages during Emergency Situation</conf-name><publisher-name>Springer</publisher-name><pub-id pub-id-type="doi">10.1007/978-3-030-73696-5_3</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Saenz</surname><given-names>J</given-names> </name><name name-style="western"><surname>Gopal</surname><given-names>SRK</given-names> </name><name name-style="western"><surname>Shukla</surname><given-names>D</given-names> </name></person-group><article-title>COVID-19 fake news infodemic research dataset (COVID19-FNIR dataset)</article-title><source>IEEE Dataport</source><access-date>2025-12-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://dx.doi.org/10.21227/b5bt-5244">https://dx.doi.org/10.21227/b5bt-5244</ext-link></comment></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rai</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kornides</surname><given-names>M</given-names> </name><name name-style="western"><surname>Morgan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cappella</surname><given-names>J</given-names> </name><name name-style="western"><surname>Guntuku</surname><given-names>SC</given-names> </name></person-group><article-title>Detecting and monitoring concerns against HPV vaccination on social media using large language models</article-title><source>Sci Rep</source><year>2024</year><month>06</month><day>21</day><volume>14</volume><issue>1</issue><fpage>14362</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-64703-3</pub-id><pub-id pub-id-type="medline">38906941</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Glazkova</surname><given-names>A</given-names> </name><name name-style="western"><surname>Glazkov</surname><given-names>M</given-names> </name><name name-style="western"><surname>Trifonov</surname><given-names>T</given-names> </name></person-group><article-title>g2tmn at constraint@ AAAI2021: exploiting CT-BERT and ensembling learning for COVID-19 fake news detection</article-title><source>International Workshop on Combating On Line Hostile Posts in Regional Languages during Emergency Situation</source><year>2021</year><publisher-name>Springer</publisher-name><fpage>116</fpage><lpage>127</lpage><pub-id pub-id-type="doi">10.1007/978-3-030-73696-5_12</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Song</surname><given-names>X</given-names> </name><name name-style="western"><surname>Petrak</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>I</given-names> </name><name name-style="western"><surname>Maynard</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bontcheva</surname><given-names>K</given-names> </name></person-group><article-title>Classification aware neural topic model for COVID-19 disinformation categorisation</article-title><source>PLoS One</source><year>2021</year><volume>16</volume><issue>2</issue><fpage>e0247086</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0247086</pub-id><pub-id pub-id-type="medline">33600477</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ball</surname><given-names>P</given-names> </name><name name-style="western"><surname>Maxmen</surname><given-names>A</given-names> </name></person-group><article-title>The epic battle against coronavirus misinformation and conspiracy theories</article-title><source>Nature New Biol</source><year>2020</year><month>05</month><day>28</day><volume>581</volume><issue>7809</issue><fpage>371</fpage><lpage>374</lpage><pub-id pub-id-type="doi">10.1038/d41586-020-01452-z</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blei</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>AY</given-names> </name><name name-style="western"><surname>Jordan</surname><given-names>MI</given-names> </name></person-group><article-title>Latent Dirichlet Allocation</article-title><source>J Mach Learn Res</source><year>2003</year><access-date>2025-12-24</access-date><volume>3</volume><issue>Jan</issue><fpage>993</fpage><lpage>1022</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf">https://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf</ext-link></comment></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karas</surname><given-names>B</given-names> </name><name name-style="western"><surname>Qu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Q</given-names> </name></person-group><article-title>Experiments with LDA and Top2Vec for embedded topic discovery on social media data-A case study of cystic fibrosis</article-title><source>Front Artif Intell</source><year>2022</year><volume>5</volume><fpage>948313</fpage><pub-id pub-id-type="doi">10.3389/frai.2022.948313</pub-id><pub-id pub-id-type="medline">36062265</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Egger</surname><given-names>R</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>J</given-names> </name></person-group><article-title>A topic modeling comparison between LDA, NMF, Top2Vec, and BERTopic to demystify Twitter posts</article-title><source>Front Sociol</source><year>2022</year><volume>7</volume><fpage>886498</fpage><pub-id pub-id-type="doi">10.3389/fsoc.2022.886498</pub-id><pub-id pub-id-type="medline">35602001</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abdelrazek</surname><given-names>A</given-names> </name><name name-style="western"><surname>Eid</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gawish</surname><given-names>E</given-names> </name><name name-style="western"><surname>Medhat</surname><given-names>W</given-names> </name><name name-style="western"><surname>Hassan</surname><given-names>A</given-names> </name></person-group><article-title>Topic modeling algorithms and applications: a survey</article-title><source>Inf Syst</source><year>2023</year><month>02</month><volume>112</volume><fpage>102131</fpage><pub-id pub-id-type="doi">10.1016/j.is.2022.102131</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>R&#x00F6;der</surname><given-names>M</given-names> </name><name name-style="western"><surname>Both</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hinneburg</surname><given-names>A</given-names> </name></person-group><article-title>Exploring the space of topic coherence measures</article-title><conf-name>WSDM &#x2019;15: Proceedings of the Eighth ACM International Conference on Web Search and Data Mining</conf-name><conf-date>Feb 2-6, 2015</conf-date><conf-loc>Shanghai China</conf-loc><fpage>399</fpage><lpage>408</lpage><pub-id pub-id-type="doi">10.1145/2684822.2685324</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bouma</surname><given-names>G</given-names> </name></person-group><article-title>Normalized (pointwise) mutual information in collocation extraction</article-title><source>From Form to Meaning: Processing Texts Automatically (Proceedings of the Biennial GSCL Conference 2009)</source><year>2009</year><access-date>2025-12-22</access-date><publisher-name>Gunter Narr Verlang</publisher-name><fpage>31</fpage><lpage>40</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://books.google.com/books?hl=en&#x0026;lr=&#x0026;id=Ksv5DwAAQBAJ&#x0026;oi=fnd&#x0026;pg=PA31&#x0026;dq=Normalized+(pointwise)+mutual+information+in+collocation+extraction&#x0026;ots=Tj_ZJWS3q6&#x0026;sig=GOCo_lpkdER213-G_qlIx5EHfTs#v=onepage&#x0026;q=Normalized%20(pointwise)%20mutual%20information%20in%20collocation%20extraction&#x0026;f=false">https://books.google.com/books?hl=en&#x0026;lr=&#x0026;id=Ksv5DwAAQBAJ&#x0026;oi=fnd&#x0026;pg=PA31&#x0026;dq=Normalized+(pointwise)+mutual+information+in+collocation+extraction&#x0026;ots=Tj_ZJWS3q6&#x0026;sig=GOCo_lpkdER213-G_qlIx5EHfTs#v=onepage&#x0026;q=Normalized%20(pointwise)%20mutual%20information%20in%20collocation%20extraction&#x0026;f=false</ext-link></comment></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Brown</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mann</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ryder</surname><given-names>N</given-names> </name><name name-style="western"><surname>Subbiah</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kaplan</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Dhariwal</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Language models are few-shot learners</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 22, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.2005.14165</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="web"><article-title>International Fact-Checking Network (IFCN). COVID-19 Fact-Checking Database</article-title><source>Poynter Institute</source><access-date>2025-12-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.poynter.org/ifcn-covid-19-misinformation/">https://www.poynter.org/ifcn-covid-19-misinformation/</ext-link></comment></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="web"><article-title>45 CFR part 46 &#x2013; protection of human subjects (common rule)</article-title><source>US Department of Health &#x0026; Human Services</source><access-date>2025-12-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ecfr.gov/current/title-45/subtitle-A/subchapter-A/part-46">https://www.ecfr.gov/current/title-45/subtitle-A/subchapter-A/part-46</ext-link></comment></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kenton</surname><given-names>J</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>LK</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><year>2019</year><access-date>2025-12-02</access-date><conf-name>Proceedings of NAACL-HLT</conf-name><conf-date>Jun 2-7, 2019</conf-date><conf-loc>Minneapolis, MN</conf-loc><fpage>2</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://au1206.github.io/assets/pdfs/BERT.pdf">https://au1206.github.io/assets/pdfs/BERT.pdf</ext-link></comment></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Radford</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Child</surname><given-names>R</given-names> </name><name name-style="western"><surname>Luan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Amodei</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sutskever</surname><given-names>I</given-names> </name></person-group><article-title>Language models are unsupervised multitask learners</article-title><year>2019</year><access-date>2025-12-19</access-date><publisher-name>OpenAI blog</publisher-name><fpage>9</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://storage.prod.researchhub.com/uploads/papers/2020/06/01/language-models.pdf">https://storage.prod.researchhub.com/uploads/papers/2020/06/01/language-models.pdf</ext-link></comment></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raffel</surname><given-names>C</given-names> </name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names> </name><name name-style="western"><surname>Roberts</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Narang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Matena</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Exploring the limits of transfer learning with a unified text-to-text transformer</article-title><source>J Mach Learn Res</source><year>2020</year><access-date>2025-12-19</access-date><volume>21</volume><issue>140</issue><fpage>1</fpage><lpage>67</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.jmlr.org/papers/volume21/20-074/20-074.pdf">https://www.jmlr.org/papers/volume21/20-074/20-074.pdf</ext-link></comment></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rainio</surname><given-names>O</given-names> </name><name name-style="western"><surname>Teuho</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kl&#x00E9;n</surname><given-names>R</given-names> </name></person-group><article-title>Evaluation metrics and statistical tests for machine learning</article-title><source>Sci Rep</source><year>2024</year><month>03</month><day>13</day><volume>14</volume><issue>1</issue><fpage>6086</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-56706-x</pub-id><pub-id pub-id-type="medline">38480847</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="web"><article-title>Outlier reduction</article-title><source>BERTopic</source><year>2025</year><month>09</month><day>18</day><access-date>2025-11-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://maartengr.github.io/BERTopic/getting_started/outlier_reduction/outlier_reduction.html">https://maartengr.github.io/BERTopic/getting_started/outlier_reduction/outlier_reduction.html</ext-link></comment></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ding</surname><given-names>X</given-names> </name><name name-style="western"><surname>Teng</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ji</surname><given-names>D</given-names> </name></person-group><article-title>Fake news detection with context awareness of the publisher</article-title><year>2023</year><month>07</month><day>1</day><conf-name>The 35th International Conference on Software Engineering and Knowledge Engineering</conf-name><conf-date>Jul 1-10, 2023</conf-date><pub-id pub-id-type="doi">10.18293/SEKE2023-061</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Piedrahita-Vald&#x00E9;s</surname><given-names>H</given-names> </name><name name-style="western"><surname>Piedrahita-Castillo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bermejo-Higuera</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Vaccine hesitancy on social media: sentiment analysis from June 2011 to April 2019</article-title><source>Vaccines (Basel)</source><year>2011</year><month>04</month><volume>9</volume><issue>1</issue><fpage>28</fpage><pub-id pub-id-type="doi">10.3390/vaccines9010028</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="web"><article-title>MDIP: misinformation detection and inoculation processing</article-title><source>GitHub, Inc</source><access-date>2025-11-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/SamiraMalek/MDIP-MDIS">https://github.com/SamiraMalek/MDIP-MDIS</ext-link></comment></nlm-citation></ref></ref-list></back></article>