<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v28i1e94855</article-id>
      <article-id pub-id-type="pmid">42077206</article-id>
      <article-id pub-id-type="doi">10.2196/94855</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Self-Reported Health Outcomes in Metabolic Health YouTube Comments: Cross-Sectional Study and Rule-Based Natural Language Processing Framework Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Brini</surname>
            <given-names>Stefano</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Cao</surname>
            <given-names>Yuchen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wreyford</surname>
            <given-names>Leon</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Ribeiro</surname>
            <given-names>Ricardo</given-names>
          </name>
          <degrees>MBA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Mechanical and Industrial Engineering</institution>
            <institution>NOVA School of Science and Technology</institution>
            <institution>Universidade Nova de Lisboa</institution>
            <addr-line>Quinta da Torre</addr-line>
            <addr-line>Caparica, Lisbon, 2829-516</addr-line>
            <country>Portugal</country>
            <phone>351 212948300</phone>
            <fax>351 212954461</fax>
            <email>rasi.ribeiro@campus.fct.unl.pt</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-5182-0017</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zutshi</surname>
            <given-names>Aneesh</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2211-2957</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Mechanical and Industrial Engineering</institution>
        <institution>NOVA School of Science and Technology</institution>
        <institution>Universidade Nova de Lisboa</institution>
        <addr-line>Caparica, Lisbon</addr-line>
        <country>Portugal</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Ricardo Ribeiro <email>rasi.ribeiro@campus.fct.unl.pt</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>26</day>
        <month>5</month>
        <year>2026</year>
      </pub-date>
      <volume>28</volume>
      <elocation-id>e94855</elocation-id>
      <history>
        <date date-type="received">
          <day>7</day>
          <month>3</month>
          <year>2026</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>3</month>
          <year>2026</year>
        </date>
        <date date-type="rev-recd">
          <day>24</day>
          <month>4</month>
          <year>2026</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>5</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Ricardo Ribeiro, Aneesh Zutshi. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 26.05.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2026/1/e94855" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>YouTube is increasingly used for healthcasting, the sharing of evidence-based dietary and lifestyle interventions by domain experts. In the metabolic health domain, channels focused on therapeutic carbohydrate restriction have accumulated audiences of millions. A distinctive feature is the comment section, where viewers share first-person accounts of health changes, constituting a unique source of real-world outcome data at scale. However, extracting structured health information from unstructured comments presents computational challenges.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This observational, cross-sectional study aims to develop and validate a precision-optimized computational framework for extracting self-reported health outcomes from healthcasting YouTube comments and to characterize the prevalence, distribution across health aspects, and channel-level variation of reported outcomes across a large-scale metabolic health corpus.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study analyzed 43,111 unique YouTube comments from 110 videos across 11 therapeutic carbohydrate restriction-focused healthcasting channels (37,458 unique authors; data span November 2013 to January 2026; collected via YouTube data application programming interface version 3). The methodology comprised 3 construction phases and 5 validation studies. The construction phases were (1) exploratory corpus characterization, (2) iterative development of a 35-aspect hierarchical health outcome ontology, and (3) precision-optimized rule-based classification, validated through precision validation (stratified sample of n=500), recall estimation (n=510), external validation on 5 held-out channels (n=12,653 comments), large language model–assisted interrater reliability assessment, and transformer baseline comparison against Bidirectional Encoder Representations from Transformers (BERT) and Robustly Optimized BERT Pretraining Approach (ROBERTa) classifiers. A supplementary aspect–based sentiment analysis contextualized the positive-only design.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The framework identified 1790 positive health outcome reports (1790/43,111, 4.15% prevalence), achieving 97.6% (488/500) precision (95% CI 95.7%-98.6%) and estimated 56.2% recall (95% CI 43.4%-67.9%). The reports described 6674 positive outcomes, distributed across 35 health aspects and 18 named disease conditions extending beyond weight loss: pain and inflammation reduction (1137/6674, 17%), type 2 diabetes improvement (977/6674, 14.6%), skin health (784/6674, 11.8%), and psychological well-being (731/6674, 11%). Over half (3355/6674, 50.3%) spanned multiple research objectives. Significant channel-level variation was observed (χ²<sub>10</sub>=927.5; <italic>P</italic>&#60;.001), with positive outcome rates ranging from 1.32% to 10.40% (odds ratio 8.68, 95% CI 7.10-10.61). Transformer baselines achieved higher recall but lower precision, confirming their advantage for high-confidence corpus generation. A supplementary aspect-based sentiment analysis indicated a positive-to-negative ratio of approximately 4.6:1 (n=1003), with negative experiences (59/495, 11.9%) predominantly involving gastrointestinal and cardiovascular concerns.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study presents, to our knowledge, the first validated, rule-based framework for extracting self-reported metabolic health outcomes from healthcasting YouTube comments at corpus scale. Unlike existing recall-oriented social media health classifiers, the precision-optimized design achieves the confidence threshold required for outcomes research without manual review. These findings demonstrate that expert-led health content comment sections constitute a scalable, complementary data source for monitoring real-world engagement with dietary interventions, with implications for public health surveillance, platform design, and health communication research.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>healthcasting</kwd>
        <kwd>therapeutic carbohydrate restriction</kwd>
        <kwd>user-generated content</kwd>
        <kwd>health outcomes</kwd>
        <kwd>natural language processing</kwd>
        <kwd>ontology engineering</kwd>
        <kwd>precision-optimized extraction</kwd>
        <kwd>YouTube</kwd>
        <kwd>self-reported outcomes</kwd>
        <kwd>metabolic health</kwd>
        <kwd>digital health</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background and Motivation</title>
        <p>Ongoing advances in metabolic health research have identified insulin resistance and excessive glycemic variability as principal contributors to chronic systemic inflammation and metabolic stress [<xref ref-type="bibr" rid="ref1">1</xref>]. Therapeutic carbohydrate restriction (TCR), encompassing ketogenic, low-carbohydrate, carnivore, and intermittent fasting approaches, reduces dietary carbohydrate to shift metabolic fuel use toward fatty acid oxidation and ketone body production [<xref ref-type="bibr" rid="ref2">2</xref>]. TCR-based interventions have demonstrated clinically significant improvements in glycemic control, body composition, and cardiometabolic risk markers across multiple randomized controlled trials and systematic reviews [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref8">8</xref>], with a recent meta-analysis of 30 randomized controlled trials (3806 adults) reporting significant reductions in metabolic syndrome indicators [<xref ref-type="bibr" rid="ref9">9</xref>]. Research has extended into neurological applications [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>] and metabolic psychiatry, where pilot clinical data suggest that ketogenic interventions may improve psychiatric symptom severity in bipolar disorder and schizophrenia [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref16">16</xref>].</p>
        <p>Despite this growing evidence base, several structural challenges constrain the conduct of large-scale clinical trials on dietary interventions. Dietary trials are inherently difficult to blind, compliance monitoring is resource-intensive, and long-term adherence remains a persistent methodological challenge [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Critically, because TCR interventions involve dietary and lifestyle modification rather than pharmaceutical compounds, there is no direct commercial entity positioned to sponsor large-scale efficacy trials comparable to those conducted for pharmacological interventions [<xref ref-type="bibr" rid="ref6">6</xref>]. This funding asymmetry does not reflect a lack of scientific interest or clinical signal, but rather the structural economics of nutrition research [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>Concurrently, a substantial population is adopting TCR-based dietary approaches outside formal clinical settings [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], informed by credentialled expert content disseminated through YouTube. Over the past decade, a distinct health communication phenomenon has emerged [<xref ref-type="bibr" rid="ref20">20</xref>]: expert-led channels in which physicians, researchers, and clinicians share evidence-based dietary interventions directly with lay audiences at scale [<xref ref-type="bibr" rid="ref21">21</xref>]. We term this phenomenon healthcasting: the systematic delivery of health education through video platforms by domain experts, bypassing traditional clinical and media gatekeeping structures [<xref ref-type="bibr" rid="ref22">22</xref>]. We adopt this compound term to distinguish the specific phenomenon of expert-led health content creation with bidirectional outcome reporting from broader categories such as health podcasting or medical influencing. In the metabolic health domain, healthcasting channels focused on TCR have accumulated audiences in the millions, with comment engagement growing from a few hundred interactions per year in 2017 to more than 73,000 comments in 2024 across the 11 channels examined in this study. Because TCR interventions are dietary rather than pharmaceutical, they are uniquely amenable to self-directed implementation [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], making this domain one of the most developed examples of research-to-audience healthcasting. The purpose of this paper is to extract and analyze user-reported health outcomes within this specific approach, not to compare TCR with alternative interventions.</p>
        <p>Beneath these videos, many viewers post comments reporting personal health changes, frequently including temporal markers suggesting longitudinal self-monitoring (eg, “after 5 weeks... my fatty liver is reversed”) [<xref ref-type="bibr" rid="ref23">23</xref>]. While each comment is classified independently, the prevalence of temporal language provides indirect evidence that commenters report outcomes observed over weeks to months of dietary change. These comments constitute unsolicited, real-world, naturalistic health outcome data not available in any clinical registry, representing self-reported experiences of individuals who encountered expert content, acted on it, and publicly documented the results [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        <p>Several important caveats apply to the interpretation of self-reported health outcomes extracted from social media commentary. Such data are subject to selection bias [<xref ref-type="bibr" rid="ref25">25</xref>] (individuals who experience positive outcomes may be more likely to comment), survivorship bias (those who discontinued may not return to report), recall bias [<xref ref-type="bibr" rid="ref26">26</xref>], and the absence of clinical verification [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. The data do not constitute clinical evidence in the conventional sense, cannot establish causal relationships between dietary interventions and health outcomes, and should not be interpreted as demonstrating clinical efficacy.</p>
        <p>The purpose of this study is to identify which health conditions users of TCR-focused healthcasting content self-report as improved and to examine factors that may influence the distribution of these reports. The case of metabolic health and TCR was selected because it represents one of the most developed and active domains of healthcasting, with sufficient comment volume and content creator diversity to support computational analysis at scale [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref20">20</xref>].</p>
      </sec>
      <sec>
        <title>Research Gap</title>
        <p>Health information extraction from social media [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>] has focused predominantly on pharmacovigilance and adverse drug event detection [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>], with the social media mining for health applications shared tasks expanding from rule-based systems to large language models (LLMs) [<xref ref-type="bibr" rid="ref34">34</xref>]. Research has also examined Reddit mental health communities [<xref ref-type="bibr" rid="ref35">35</xref>] and YouTube health video quality [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], and YouTube video comments on dietary topics have been examined using text mining approaches [<xref ref-type="bibr" rid="ref38">38</xref>]. Systematic mining of YouTube comment sections for self-reported health outcomes, particularly dietary interventions, has not been addressed. This represents a gap in both health informatics methodology and our understanding of how healthcasting content translates into reported health change at the population level.</p>
        <p>In the methodological literature, existing classification systems have been optimized predominantly for balanced <italic>F</italic><sub>1</sub> performance, with precision typically reported in the 80-90% range [<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>]. For applications requiring high-confidence corpus generation, where the downstream analysis depends on the validity of every included observation, this precision level is insufficient. A system that incorrectly classifies 1 in 10 or 1 in 5 comments as positive health outcomes would introduce systematic noise into any analysis of outcome distributions, disease-specific prevalence rates, or channel-level variation. The gap this study addresses is therefore twofold: the absence of a domain-specific framework for extracting self-reported health outcomes from healthcasting content, and the absence of a precision-optimized extraction methodology explicitly designed to generate validated corpora for downstream health outcomes research.</p>
      </sec>
      <sec>
        <title>Research Questions</title>
        <p>This study aims to address the following research questions (RQs):</p>
        <list list-type="bullet">
          <list-item>
            <p>RQ1: what is the prevalence of self-reported positive health outcomes in YouTube comments on metabolic health content?</p>
          </list-item>
          <list-item>
            <p>RQ2: what types of health outcomes are most frequently reported, and how are they distributed across subjective, objective, and disease-specific categories?</p>
          </list-item>
          <list-item>
            <p>RQ3: does positive outcome reporting vary significantly across content creators, and what factors may explain this variation?</p>
          </list-item>
          <list-item>
            <p>RQ4: can a precision-optimized rule-based framework achieve sufficient classification accuracy for generating validated health outcome corpora from user-generated content?</p>
          </list-item>
        </list>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>The methodology comprises 3 construction phases, an integrated program of validation studies, and a supplementary contextualization analysis. The construction phases are (1) exploratory data analysis and corpus characterization, (2) ontology development through iterative pattern extraction, and (3) rule-based classification. Phase 3 is then stress-tested through five complementary validation studies: precision validation, recall estimation, external validation on held-out channels, interrater reliability assessment, and transformer baseline comparison. A supplementary aspect-based sentiment analysis (ABSA) contextualizes the primary framework’s positive-only design. <xref rid="figure1" ref-type="fig">Figure 1</xref> presents the overall framework architecture.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Architecture of the rule-based natural language processing framework for extracting self-reported positive health outcomes from English-language YouTube comments on metabolic health channels (N=43,111 unique comments, November 2013 to January 2026). The methodology comprises three construction phases: (1) exploratory data analysis, (2) ontology development, and (3) rule-based classification. The classifier is stress-tested through a program of Validation Studies (precision validation, recall estimation, external validation on held-out channels, interrater reliability, and transformer baseline comparison), and a supplementary analysis provides aspect-based sentiment contextualization of the positive-only design. The precision-optimized classifier uses conservative classification rules and extensive exclusion filtering. BERT: Bidirectional Encoder Representations from Transformers; LDA: latent dirichlet allocation; LLM: large language model; RO: research objective; NLP: natural language processing; RoBERTa: Robustly Optimized BERT Pretraining Approach.</p>
          </caption>
          <graphic xlink:href="jmir_v28i1e94855_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Research Design Overview</title>
        <p>The framework was explicitly designed to maximize precision rather than recall. This design choice reflects the intended application: generating a high-confidence corpus of verified positive health outcomes suitable for downstream analysis. In health informatics applications, false positives (incorrectly classified outcomes) can lead to erroneous conclusions about treatment efficacy, whereas false negatives (missed outcomes) simply reduce statistical power without introducing systematic error. Following established guidance in clinical text mining [<xref ref-type="bibr" rid="ref42">42</xref>], we prioritized precision when high-confidence annotations are required. This study follows the CREMLS (Consolidated Reporting Guidelines for Prognostic and Diagnostic Machine Learning Modeling Studies); the completed checklist is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>The exclusive focus on positive health outcomes reflects three considerations: positive outcomes provide the most linguistically distinctive targets, negative outcomes pose fundamentally different classification challenges (heterogeneous expressions requiring distinct approaches), and follower comment sections exhibit a structural positive bias.</p>
        <p>The primary contributions of this work are: (1) a hierarchical ontology of 35 health aspects capturing subjective, objective, and disease-specific outcomes in TCR-focused healthcasting content; (2) a precision-optimized rule-based classification system achieving 97.6% (488/500) precision (95% CI 95.7%-98.6%); (3) a validated corpus of 1747 estimated true positive health outcome reports from 43,111 unique comments across 37,458 unique commenters; (4) comprehensive precision-recall characterization; (5) interrater reliability assessment using dual-model LLM-assisted annotation; (6) a domain-level analysis of healthcasting outcome patterns and channel-level variation; and (7) a supplementary ABSA contextualizing the positive-only extraction scope.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study analyzes publicly available YouTube comments accessed through the official YouTube data application programming interface (API) version 3, in compliance with the platform’s terms of service and API use policies. No formal ethics committee review was sought, consistent with established practice in computational social media research involving publicly accessible data, where no interaction with users occurs, and no intervention is administered [<xref ref-type="bibr" rid="ref43">43</xref>]. Several methodological and procedural safeguards were implemented to protect user privacy and ensure responsible data handling.</p>
        <p>Data collection was limited to publicly posted comments that users understand to be visible to all internet users. No private messages, restricted content, or data requiring authentication were accessed. All processing was automated with no direct interaction with commenters, and no personally identifiable information was retained beyond publicly visible usernames used only for de-duplication. Informed consent was not required, as this study involved secondary analysis of publicly posted data with no direct interaction with or intervention upon users.</p>
        <p>Comment excerpts presented in this study are reproduced only in truncated or paraphrased form to minimize the risk of reidentification through text search. The raw comment corpus is not included in the supplementary materials because YouTube’s terms of service restrict the redistribution of bulk API-retrieved data. The classification code, ontology, and validation protocols are made available to enable methodological reproducibility without compromising user privacy.</p>
        <p>No compensation was provided to any participants, as this study involved secondary analysis of publicly available data and did not involve direct interaction with commenters. No images of identifiable individuals are included in this manuscript. The study was conducted in accordance with the principles of the Declaration of Helsinki applicable to observational research involving publicly available data.</p>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>Comments were collected from 11 YouTube channels producing content on metabolic health and TCR. Channel selection criteria included: (1) medical or scientific credentials of content creators, (2) minimum subscriber threshold of 100,000, (3) focus on metabolic health topics, and (4) active comment sections. The YouTube Data API v3 was used to retrieve the 10 most-commented videos per channel and up to 2000 comments per video, yielding a raw corpus of 209,661 records from 110 videos. After removing duplicate records caused by API pagination (the YouTube Data API v3 returns nonunique results when paginating beyond available comments with relevance-based ordering), 43,111 unique comments were retained. Data collection was performed on January 2, 2026, capturing comments spanning November 7, 2013, to January 2, 2026. <xref ref-type="table" rid="table1">Table 1</xref> presents corpus statistics by channel.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Corpus statistics for 11 metabolic health YouTube channels included in a cross-sectional computational analysis of self-reported health outcomes (N=43,111 unique English-language comments, November 2013 to January 2026).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="170"/>
            <col width="100"/>
            <col width="130"/>
            <col width="130"/>
            <col width="110"/>
            <col width="130"/>
            <col width="70"/>
            <thead>
              <tr valign="bottom">
                <td>Channel<sup>a</sup></td>
                <td>Background<sup>b</sup></td>
                <td>Channel age (years)</td>
                <td>Subscribers<sup>c</sup> (thousands)</td>
                <td>Views (millions)</td>
                <td>Comments<sup>d</sup></td>
                <td>Positive outcomes</td>
                <td>Rate (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>KenDBerryMD</td>
                <td>Family medicine, MD (University of Tennessee)</td>
                <td>14.8</td>
                <td>3660</td>
                <td>533</td>
                <td>3970</td>
                <td>413</td>
                <td>10.40</td>
              </tr>
              <tr valign="top">
                <td>Eric Berg DC</td>
                <td>Chiropractor, DC (Palmer College)</td>
                <td>16.7</td>
                <td>14,500</td>
                <td>3350</td>
                <td>3993</td>
                <td>282</td>
                <td>7.06</td>
              </tr>
              <tr valign="top">
                <td>Eric Westman</td>
                <td>Internal and obesity medicine, MD, PhD (University of Wisconsin), MHS (Duke)</td>
                <td>10.1</td>
                <td>297</td>
                <td>36</td>
                <td>3978</td>
                <td>250</td>
                <td>6.28</td>
              </tr>
              <tr valign="top">
                <td>Jason Fung</td>
                <td>Nephrology, MD (University of Toronto)</td>
                <td>14.2</td>
                <td>1410</td>
                <td>86</td>
                <td>3963</td>
                <td>286</td>
                <td>7.22</td>
              </tr>
              <tr valign="top">
                <td>Ben Bikman</td>
                <td>Cell biology and physiology, PhD (East Carolina University)</td>
                <td>9.0</td>
                <td>191</td>
                <td>9</td>
                <td>3476</td>
                <td>65</td>
                <td>1.87</td>
              </tr>
              <tr valign="top">
                <td>Nick Norwitz</td>
                <td>Metabolism, PhD (Oxford), MD (Harvard)</td>
                <td>11.8</td>
                <td>854</td>
                <td>55</td>
                <td>3954</td>
                <td>82</td>
                <td>2.07</td>
              </tr>
              <tr valign="top">
                <td>Anthony Chaffee MD</td>
                <td>Neurosurgery, MD (Royal College of Surgeons)</td>
                <td>11.3</td>
                <td>536</td>
                <td>109</td>
                <td>3972</td>
                <td>104</td>
                <td>2.62</td>
              </tr>
              <tr valign="top">
                <td>Shawn Baker MD</td>
                <td>Orthopedic surgery, MD (Texas Tech)</td>
                <td>11.8</td>
                <td>383</td>
                <td>83</td>
                <td>3933</td>
                <td>52</td>
                <td>1.32</td>
              </tr>
              <tr valign="top">
                <td>Dr. Robert Cywes</td>
                <td>Bariatric and pediatric surgery, MD, PhD (University of Cape Town)</td>
                <td>6.1</td>
                <td>307</td>
                <td>37</td>
                <td>3932</td>
                <td>102</td>
                <td>2.59</td>
              </tr>
              <tr valign="top">
                <td>Dr. Boz</td>
                <td>Internal medicine, MD (University of South Dakota)</td>
                <td>13.7</td>
                <td>1180</td>
                <td>184</td>
                <td>3959</td>
                <td>86</td>
                <td>2.17</td>
              </tr>
              <tr valign="top">
                <td>Mark Hyman</td>
                <td>Functional medicine, MD (University<break/>of Ottawa)</td>
                <td>18.5</td>
                <td>1490</td>
                <td>144</td>
                <td>3980</td>
                <td>68</td>
                <td>1.71</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Channels ordered by positive outcome rate.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>The background section lists the primary professional credentials and degree-granting institution for each content creator.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Subscriber counts and total channel views were updated on March 4, 2026, to reflect current values. YouTube reports subscriber counts rounded to three significant figures.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>Comments data were collected via the YouTube data application programming interface, version 3, on January 2, 2026 (10 most-commented videos per channel, up to 2000 comments per video).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Video selection was maximized for content relevance and comment volume: for each channel, the 10 most-commented videos were identified using the YouTube data API. Content validity was addressed through channel credential requirements, the engagement-based selection criterion, and the classification framework’s exclusion filters. Critically, the unit of analysis is the viewer comment, not the video content itself; consequently, the classification framework’s accuracy is independent of video content quality, and no formal content quality instrument was applied to the videos. Listing for all 110 video titles, URLs, and metadata is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. Video durations ranged from 19 seconds to 115.8 minutes (mean 23.3, SD 24.4 minutes). Short-form videos (n=13, 11.8%) were included because they met the selection criterion and contained comparable health-related discourse.</p>
      </sec>
      <sec>
        <title>Phase 1: Exploratory Data Analysis</title>
        <p>Initial corpus exploration used topic modeling using latent Dirichlet allocation to identify thematic structures. N-gram analysis extracted frequently occurring bi-grams and tri-grams associated with health outcomes. Representative comments were sampled from each topic cluster to inform ontology development. This phase established the linguistic patterns characterizing health outcome reports in the corpus, distinguishing personal testimonials from general health discussions.</p>
        <p>Corpus characterization revealed a right-skewed distribution of comment lengths (mean 32.5, SD 47 words; median 18, IQR 11-43 words), with engagement metrics confirming that most comments function as unsolicited declarations rather than conversational exchanges. Temporal analysis revealed exponential growth, rising from 3408 comments in 2019 to 73,207 in 2024.</p>
      </sec>
      <sec>
        <title>Phase 2: Ontology Development</title>
        <p>The health outcome ontology was developed through an iterative, corpus-driven process in which data-derived linguistic patterns were combined with domain expert knowledge. Starting from latent Dirichlet allocation topic modeling and n-gram extraction conducted in Phase 1, candidate health concepts were identified from the most frequently occurring bi-grams and tri-grams co-occurring with outcome-indicative language (eg, “lost weight,” “blood sugar normalized,” and “pain is gone”). These candidates were then organized into a hierarchical structure of research objectives (ROs), each containing a set of thematically coherent aspects.</p>
        <p>Three ROs were defined: RO1 captures subjective experiences (how users feel day-to-day), RO2 captures measurable biomarkers and anthropometric changes, and RO3 captures disease-level resolution (named conditions improved or reversed). The tiers are complementary and nonmutually exclusive, with 50.3% (n=3355) of 6674 reported positive outcomes spanning more than one RO.</p>
        <p>Each aspect was defined with a unique identifier (eg, RO2.1), scope definition, inclusion keywords matched using whole-word regular expressions (case-insensitive), and exclusion patterns redirecting ambiguous matches to more specific aspects. The complete ontology comprises 35 aspects across 3 ROs, totaling 520 keywords. <xref ref-type="table" rid="table2">Table 2</xref> presents the full structure; the complete keyword set is available in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Health outcome ontology used in a rule-based natural language processing framework for extracting self-reported outcomes from English-language YouTube comments on metabolic health channels (N=43,111 unique comments, November 2013 to January 2026). The ontology comprises 35 aspects organized under three research objectives with 520 total keywords. Representative keywords are shown; the complete keyword set is available in multimedia files.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="140"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="250"/>
            <col width="0"/>
            <col width="440"/>
            <thead>
              <tr valign="bottom">
                <td colspan="3">ID</td>
                <td colspan="2">Aspect name<sup>a</sup></td>
                <td colspan="2">Domain; type</td>
                <td>Scope and representative keywords</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>RO1: Subjective well-being (9 aspects):</bold>
                  <bold>self-reported improvements in quality of life, symptoms, and subjective health status</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.1</td>
                <td colspan="2">Cognitive function</td>
                <td colspan="2">Subjective; neurological</td>
                <td colspan="2">Brain fog, mental clarity, memory, focus, concentration, cognitive improvement</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.2</td>
                <td colspan="2">Energy and vitality</td>
                <td colspan="2">Subjective; metabolic</td>
                <td colspan="2">Energy levels, fatigue, tiredness, lethargy, stamina, vitality, no longer tired</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.3</td>
                <td colspan="2">Psychological well-being</td>
                <td colspan="2">Subjective; mental health</td>
                <td colspan="2">Anxiety, depression, mood, stress, mental health, happiness, calm, irritability</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.4</td>
                <td colspan="2">Sleep quality</td>
                <td colspan="2">Subjective; circadian</td>
                <td colspan="2">Sleep improvement, insomnia, sleep apnea, waking rested, deep sleep, better sleep</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.5</td>
                <td colspan="2">Appetite and satiety</td>
                <td colspan="2">Subjective; metabolic</td>
                <td colspan="2">Hunger, cravings, satiety, appetite control, sugar cravings gone, no longer hungry</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.6</td>
                <td colspan="2">Pain and inflammation</td>
                <td colspan="2">Subjective; musculoskeletal</td>
                <td colspan="2">Pain, chronic pain, back pain, joint pain, headache, migraine, swelling, stiffness</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.7</td>
                <td colspan="2">Digestive health</td>
                <td colspan="2">Subjective; gastrointestinal</td>
                <td colspan="2">Bloating, IBS<sup>b</sup>, constipation, acid reflux, gut health, digestion improved, heartburn</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.8</td>
                <td colspan="2">Skin health</td>
                <td colspan="2">Subjective; dermatological</td>
                <td colspan="2">Acne, eczema, psoriasis, rash, skin cleared, skin tags, dermatitis, rosacea</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO1.9</td>
                <td colspan="2">Hormonal and menstrual health</td>
                <td colspan="2">Subjective; endocrine</td>
                <td colspan="2">Hormonal symptoms, menstrual cycle, PMS<sup>c</sup>, perimenopause, hot flushes, libido</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>RO2: Tool-mediated validation (8 aspects): outcomes verified through measurement tools, clinical tests, or quantification</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO2.1</td>
                <td colspan="2">Anthropometric changes</td>
                <td colspan="2">Objective; measured</td>
                <td colspan="2">Weight, pounds, kg, lbs lost, waist, BMI, body fat, dress size, inches, visceral fat</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO2.2</td>
                <td colspan="2">Glycemic control</td>
                <td colspan="2">Objective; lab biomarker</td>
                <td colspan="2">Blood sugar, A<sub>1C</sub>, HbA<sub>1c</sub><sup>d</sup>, fasting glucose, fasting insulin, CGM<sup>e</sup>, glucometer reading</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO2.3</td>
                <td colspan="2">Blood pressure</td>
                <td colspan="2">Objective; measured</td>
                <td colspan="2">Blood pressure, systolic, diastolic, BP normalized, hypertension controlled, mmHg</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO2.4</td>
                <td colspan="2">Lipid profile</td>
                <td colspan="2">Objective; lab biomarker</td>
                <td colspan="2">Cholesterol, triglycerides, HDL<sup>f</sup>, LDL<sup>g</sup>, lipid panel, cholesterol improved, statins off</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO2.5</td>
                <td colspan="2">Inflammatory markers</td>
                <td colspan="2">Objective; lab biomarker</td>
                <td colspan="2">CRP, C-reactive protein, inflammatory markers, ESR<sup>h</sup>, inflammation markers reduced</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO2.6</td>
                <td colspan="2">Liver function</td>
                <td colspan="2">Objective; lab biomarker</td>
                <td colspan="2">ALT<sup>i</sup>, AST<sup>j</sup>, liver enzymes, liver function test, fatty liver markers, liver normalized</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO2.7</td>
                <td colspan="2">Kidney function</td>
                <td colspan="2">Objective; lab biomarker</td>
                <td colspan="2">Creatinine, GFR<sup>k</sup>, eGFR<sup>l</sup> improved, kidney function tests, creatinine normalized</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO2.8</td>
                <td colspan="2">Hormonal markers</td>
                <td colspan="2">Objective; lab biomarker</td>
                <td colspan="2">Testosterone, estrogen, thyroid (TSH/T3/T4), cortisol, hormonal lab values improved</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>RO3: Disease specificity (18 aspects): reported improvements in named medical conditions</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.1</td>
                <td colspan="2">Type 2 diabetes</td>
                <td colspan="2">Disease; metabolic</td>
                <td colspan="2">Diabetes, diabetic, T2D, prediabetes, reversed diabetes, off metformin, off insulin</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.2</td>
                <td colspan="2">Fatty liver disease</td>
                <td colspan="2">Disease; hepatic</td>
                <td colspan="2">Fatty liver, NAFLD<sup>m</sup>, NASH<sup>n</sup>, liver disease, fatty liver reversed, hepatic steatosis</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.3</td>
                <td colspan="2">Cardiovascular disease</td>
                <td colspan="2">Disease; cardiac</td>
                <td colspan="2">Heart disease, heart failure, coronary artery disease, heart attack, cardiovascular</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.4</td>
                <td colspan="2">Hypertension</td>
                <td colspan="2">Disease; cardiovascular</td>
                <td colspan="2">Hypertension, high blood pressure, off blood pressure medication, BP controlled</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.5</td>
                <td colspan="2">PCOS</td>
                <td colspan="2">Disease; endocrine</td>
                <td colspan="2">PCOS, polycystic ovary syndrome, polycystic ovaries, PCOS symptoms improved</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.6</td>
                <td colspan="2">Neurodegenerative disease</td>
                <td colspan="2">Disease; neurological</td>
                <td colspan="2">Alzheimer, dementia, Parkinson, neurodegeneration, cognitive decline reversed</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.7</td>
                <td colspan="2">Chronic kidney disease</td>
                <td colspan="2">Disease; renal</td>
                <td colspan="2">Kidney disease, CKD, chronic kidney disease, kidney failure, renal function improved</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.8</td>
                <td colspan="2">Gout</td>
                <td colspan="2">Disease; metabolic</td>
                <td colspan="2">Gout, uric acid, gout attack gone, no more gout, uric acid normalized</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.9</td>
                <td colspan="2">Cancer</td>
                <td colspan="2">Disease; oncological</td>
                <td colspan="2">Cancer, tumor, remission, cancer markers, prostate cancer, cancer improved</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.10</td>
                <td colspan="2">Osteoporosis</td>
                <td colspan="2">Disease; musculoskeletal</td>
                <td colspan="2">Osteoporosis, bone density, bone loss, osteopenia, DEXA scan improved</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.11</td>
                <td colspan="2">Stroke</td>
                <td colspan="2">Disease; cerebrovascular</td>
                <td colspan="2">Stroke, TIA, mini stroke, stroke recovery, stroke risk reduced</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.12</td>
                <td colspan="2">ADHD</td>
                <td colspan="2">Disease; neurodevelopmental</td>
                <td colspan="2">ADHD, attention deficit, ADD, hyperactivity, ADHD symptoms improved, focus</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.13</td>
                <td colspan="2">Thyroid disease</td>
                <td colspan="2">Disease; endocrine</td>
                <td colspan="2">Thyroid, hypothyroid, hyperthyroid, Hashimoto’s, thyroid medication reduced</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.14</td>
                <td colspan="2">Inflammatory bowel disease</td>
                <td colspan="2">Disease; gastrointestinal</td>
                <td colspan="2">Crohn, ulcerative colitis, IBD, Crohn in remission, colitis improved</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.15</td>
                <td colspan="2">Autoimmune disease</td>
                <td colspan="2">Disease; immunological</td>
                <td colspan="2">Autoimmune, lupus, multiple sclerosis, rheumatoid arthritis, celiac, autoimmune improved</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.16</td>
                <td colspan="2">Fibromyalgia and neuropathy</td>
                <td colspan="2">Disease; neurological</td>
                <td colspan="2">Fibromyalgia, neuropathy, nerve pain, peripheral neuropathy, numbness, tingling gone</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.17</td>
                <td colspan="2">Arthritis</td>
                <td colspan="2">Disease; musculoskeletal</td>
                <td colspan="2">Arthritis, osteoarthritis, arthritic, rheumatoid, joint disease, arthritis improved</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RO3.18</td>
                <td colspan="2">Gallbladder disease</td>
                <td colspan="2">Disease; biliary</td>
                <td colspan="2">Gallbladder, gallstones, cholecystectomy, gallbladder attack, gallstones resolved</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Exclusion patterns redirect keyword matches to more specific aspects to prevent double-counting (eg, “arthritis” in a pain context is classified under RO3.17, not RO1.6). All keywords are matched case-insensitively with whole-word boundary constraints. Complete keyword sets and exclusion patterns are available in the supplementary materials repository.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>IBS: irritable bowel syndrome.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>PMS: premenstrual syndrome.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>HbA<sub>1c</sub>: hemoglobin A<sub>1c</sub>.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>CGM: continuous glucose monitor.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>HDL: high-density lipoprotein.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>LDL: low-density lipoprotein.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>ESR: erythrocyte sedimentation rate.</p>
            </fn>
            <fn id="table2fn9">
              <p><sup>i</sup>ALT: alanine aminotransferase.</p>
            </fn>
            <fn id="table2fn10">
              <p><sup>j</sup>AST: aspartate aminotransferase.</p>
            </fn>
            <fn id="table2fn11">
              <p><sup>k</sup>GFR: glomerular filtration rate</p>
            </fn>
            <fn id="table2fn12">
              <p><sup>l</sup>eGFR: estimated glomerular filtration rate.</p>
            </fn>
            <fn id="table2fn13">
              <p><sup>m</sup>NAFLD: nonalcoholic fatty liver disease.</p>
            </fn>
            <fn id="table2fn14">
              <p><sup>n</sup>NASH: nonalcoholic steatohepatitis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Ontology Validation and Refinement</title>
        <p>The ontology underwent 2 validation rounds (coverage testing and precision refinement) before deployment. A final manual review of 20 randomly sampled matches per aspect confirmed semantic validity before the ontology was locked for Phase 3 application.</p>
        <p>Final ontology coverage showed that 30.1% (12,976/43,111) of comments contained at least one health-relevant keyword match; of these, 4.15% (n=1790) met all criteria for a definite, first-person, positive health outcome report.</p>
      </sec>
      <sec>
        <title>Phase 3: Classification Framework</title>
        <sec>
          <title>Overview</title>
          <p>The classification system implements a 3-stage pipeline designed to maximize precision. Algorithm 1 in <xref rid="figure2" ref-type="fig">Figure 2</xref> presents the formal classification procedure.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Algorithm 1: precision-optimized health outcome classification.</p>
            </caption>
            <graphic xlink:href="jmir_v28i1e94855_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Stage 1: Health Content Detection</title>
          <p>Comments are filtered for health-related content using keyword matching against the ontology vocabulary.</p>
        </sec>
        <sec>
          <title>Stage 2: Outcome Indicator Detection</title>
          <p>Health-related comments are analyzed for positive outcome indicators, including quantified changes (eg, “lost 30 pounds”), symptom cessation (eg, “pain gone”), explicit improvement language (eg, “reversed my diabetes”), and temporal improvement markers (eg, “no longer need medication”).</p>
        </sec>
        <sec>
          <title>Stage 3: Exclusion Filtering</title>
          <p>Candidate positives are filtered through exclusion patterns removing: (1) questions rather than statements, (2) third-party reports, (3) hypothetical or aspirational language, (4) negated outcomes, and (5) general health advice. This conservative approach implements the precision-optimized design philosophy.</p>
          <p>Handling of ambiguous statements: Sarcasm is not specifically detected but is unlikely to pass all 3 stages due to the requirement for co-occurring health vocabulary, outcome indicators, and absence of exclusion patterns. Implied improvements without explicit outcome language are intentionally excluded. Narrative sequences mixing positive and negative outcomes may partially pass but are addressed by negation filters. The validation results confirm that such cases account for a minority of false positives (complete classification rule patterns are provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p>
        </sec>
      </sec>
      <sec>
        <title>Validation Studies</title>
        <sec>
          <title>Overview</title>
          <p>The classification framework was stress-tested through 5 complementary validation studies, each probing a different dimension of performance: internal precision on the development corpus, recall and coverage through weighted negative sampling, out-of-sample generalizability on held-out channels, rater agreement via LLM-assisted interrater reliability assessment, and head-to-head comparison against fine-tuned transformer baselines. Each study is described in turn below; full protocols and results for the external validation and transformer baseline are reported in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendices 5</xref> and <xref ref-type="supplementary-material" rid="app6">6</xref>, respectively.</p>
        </sec>
        <sec>
          <title>Precision Validation</title>
          <p>Classification precision was validated through manual coding of a stratified random sample. Sample size was calculated for 95% confidence level with 4% margin of error, yielding n=500 samples stratified by RO. Each sample was coded on five dimensions: (1) is_positive_outcome (yes, no, or unclear), (2) is_personal (yes or no), (3) is_definite (yes or no), (4) aspect_correct (yes, partial, or no), and (5) free-text notes. Wilson score [<xref ref-type="bibr" rid="ref44">44</xref>] confidence intervals were computed for all proportions.</p>
        </sec>
        <sec>
          <title>Recall Estimation</title>
          <p>Recall was estimated through stratified negative sampling. A stratified random sample of n=510 comments from the nonclassified pool (41,321 comments not identified as positive outcomes) was drawn using disproportionate allocation across 3 comment-length strata (short: fewer than 50 words, n=300; medium: 50 to 150 words, n=150; long: more than 150 words, n=60) with proportional allocation across the 11 channels within each stratum. Each sampled comment was manually reviewed by the first author to identify false negatives: true positive outcomes missed by the classification system. Sampling weights reflecting the population proportion in each length stratum were applied to compute unbiased weighted false-negative rates, which were then extrapolated to estimate total missed positives and to calculate recall. Wilson score CIs were used throughout.</p>
        </sec>
        <sec>
          <title>External Validation on Held-Out Channels</title>
          <p>To address the concern that the 35-aspect ontology was iteratively refined on the same corpus used for classification, and thereby to test generalizability beyond the development channels, external validation was conducted on 12,653 comments collected from 5 independent YouTube channels with zero overlap with the development corpus. The 5 held-out channels (Georgia Ede MD, Robert Kiltz MD, Sten Ekberg DC, Chris Palmer MD, and Ted Naiman MD) were selected by an independent co-author based on topical relevance to ketogenic, carnivore, and broader metabolic health dietary content. For each channel, comments were collected from the 10 most-commented videos using the YouTube data API, following the same protocol as the development corpus. The classifier was applied without modification. All comments classified as positive health outcomes were exhaustively verified through manual coding to determine precision (census approach). For recall estimation, a stratified random sample of nonpositive comments (up to 100 per channel; random seed 42) was manually coded to identify false negatives, with channel-level false-negative rates extrapolated to the full negative population, and 95% Wilson score CIs reported. The full external validation protocol and results are presented in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p>
        </sec>
        <sec>
          <title>Interrater Reliability Assessment</title>
          <p>To address the limitation of single-coder validation, we used an LLM-assisted annotation validation protocol using 2 independent LLM coders (GPT-4o and GPT-4.1) (OpenAI) as second annotators [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. This approach, increasingly adopted in computational linguistics and health informatics, provides a systematic assessment of interrater reliability while maintaining full reproducibility.</p>
          <p>From the 500-sample precision validation set, 28 exemplars were selected through purposive stratified sampling to serve as few-shot coding examples. The stratification covered 6 coding outcome categories: clear true positives (n=10), clear negatives or false positives (n=5), unclear or ambiguous cases (n=5), positive but not personal outcomes (n=3), positive but not definite outcomes (n=2), and aspect assignment issues (n=3). This selection ensured representation of all 3 ROs, 10 of 11 channels, all 6 outcome categories, and deliberate overrepresentation of minority classes and boundary cases, which is standard practice in few-shot prompt design.</p>
          <p>Both models received identical structured prompts containing: (1) the coding task definition with detailed guidelines for all four coding dimensions, (2) the complete 35-aspect ontology reference, (3) all 28 few-shot exemplars with the researcher’s ground-truth codings, and (4) the comment text with automated classification details but without the researcher’s manual codings (full prompt provided in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>). Processing used temperature 0.0 (deterministic output) in batches of 10 comments. Agreement was computed on the remaining 472 test samples (exemplars excluded to prevent circular validation).</p>
          <p>To mitigate anchoring effects, exemplars spanned 6 coding outcome categories (including clear negatives and errors), and the prompt instructed models to code independently based on the comment text, explicitly stating that the automated classification may be incorrect.</p>
          <p>Cohen κ [<xref ref-type="bibr" rid="ref47">47</xref>] was computed for each coding dimension across 3 comparison pairs (human vs GPT-4o, human vs GPT-4.1, and GPT-4o vs GPT-4.1). Because the validation set’s 90.4% positive class prevalence creates a κ paradox [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>], we report the prevalence index and bias index alongside each κ value to decompose the paradox. Raw percent agreement and Cohen κ are the primary reliability metrics, interpreted using the Landis and Koch [<xref ref-type="bibr" rid="ref47">47</xref>] framework.</p>
        </sec>
        <sec>
          <title>Transformer Baseline Comparison</title>
          <p>To test whether the precision advantage of the rule-based framework comes at an unnecessary cost to recall compared with learned representations trained on the same data, a head-to-head baseline comparison was conducted against two pretrained transformer models: Bidirectional Encoder Representations from Transformers (BERT)-base-uncased and Robustly Optimized BERT Pretraining Approach (RoBERTa)-base (Hugging Face). Both models were fine-tuned on the combined precision-validation and recall-expansion datasets (n=836 unique manually-coded comments; 347 positive and 489 negative) using stratified 5-fold cross-validation. Standard pretrained weights and default fine-tuning hyperparameters were used; no hyperparameter search was performed, because the purpose of this analysis was a fair comparison against a reasonable learned baseline rather than an optimized benchmark. Performance was evaluated on 4 metrics (precision, recall, <italic>F</italic><sub>1</sub>-score, and receiver operating characteristic-area under the curve) averaged across folds, and, separately, on the 326-comment precision-validation subset and on the 27 false negatives identified through the recall-expansion sample. The rule-based framework was compared against both transformer baselines on each metric. The full experimental protocol, hyperparameters, and per-fold results are presented in <xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>.</p>
        </sec>
      </sec>
      <sec>
        <title>Supplementary Analysis: Aspect-Based Sentiment Contextualization</title>
        <p>This supplementary analysis is reported here, rather than as a coequal phase of the primary methodology, for 2 reasons. First, it uses a methodologically distinct procedure (LLM consensus coding) that cannot be directly compared to the validation regime applied to the rule-based framework. Second, its purpose is to explore and contextualize the positive-only design of the primary framework, not to estimate the prevalence of negative outcomes in the underlying population. Claims derived from this analysis are therefore treated as indicative rather than confirmatory throughout the Discussion section.</p>
        <p>Because the classification framework extracts only positive health outcomes (as outlined in the Research Design Overview section), a supplementary analysis was conducted to contextualize this scope decision by characterizing the broader sentiment landscape of the corpus. ABSA was used to quantify the distribution of positive, negative, neutral, and mixed health sentiment across comments, providing an empirical basis for evaluating whether the positive-only focus omits a substantial volume of negative health experiences.</p>
        <p>ABSA extends document-level sentiment analysis by identifying specific aspects (topics or entities) within a text and assigning sentiment to each aspect independently [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. This granularity is essential for health-related comments, which frequently contain mixed sentiment. For example, a single comment reporting weight loss improvement alongside gastrointestinal discomfort.</p>
        <p>A stratified random sample of 1000 comments was drawn from the full corpus, proportional to channel contribution. Two independent LLMs (GPT-4o and GPT-4.1) were prompted to perform ABSA on each comment, classifying it as health-related or nonhealth-related and, for health-related comments, identifying health aspects and assigning aspect-level sentiment (positive, negative, neutral, or mixed). The dual-model design serves as a form of interrater reliability assessment: only comments in which both models agree on health-relatedness and sentiment classification are included in the consensus analysis, yielding conservative yet high-confidence sentiment estimates. The complete ABSA prompt, including the task definition, coding guidelines, and a few-shot exemplar, is provided in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Classification Performance</title>
        <p>The framework classified 1790 comments (1790/43,111, 4.15% of the corpus) as containing definite positive health outcomes. <xref ref-type="table" rid="table3">Table 3</xref> presents the complete validation metrics.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Classification validation results for a rule-based natural language processing framework applied to English-language YouTube comments on 11 metabolic health channels (N=43,111 unique comments, November 2013 to January 2026). Precision validated on n=500 stratified random samples; recall estimated from n=510 stratified negative samples. Wilson score 95% CIs.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="460"/>
            <col width="230"/>
            <col width="310"/>
            <thead>
              <tr valign="top">
                <td>Metric</td>
                <td>Value</td>
                <td>95% CI (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Precision, % (n=500)</td>
                <td>97.6</td>
                <td>95.7-98.6</td>
              </tr>
              <tr valign="top">
                <td>Recall, % (n=510)</td>
                <td>56.2</td>
                <td>43.4-67.9</td>
              </tr>
              <tr valign="top">
                <td><italic>F</italic><sub>1</sub>-score, %</td>
                <td>28.3</td>
                <td>—<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>True positives (validated)</td>
                <td>488/500</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>False positives</td>
                <td>11/500</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>False negatives (in sample)</td>
                <td>27/510</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>First-person testimony rate, %</td>
                <td>97</td>
                <td>95.2-98.1</td>
              </tr>
              <tr valign="top">
                <td>Definite outcome rate, %</td>
                <td>88.4</td>
                <td>85.3-90.9</td>
              </tr>
              <tr valign="top">
                <td>Aspect assignment accuracy (strict), %</td>
                <td>90.8</td>
                <td>88-93</td>
              </tr>
              <tr valign="top">
                <td>Aspect assignment accuracy (lenient), %</td>
                <td>97</td>
                <td>95.2-98.1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The expanded recall estimation (n=510 stratified sample) identified 27 false negatives (5.3% raw rate; 3.29% weighted rate), yielding an estimated recall of 56.2% (95% CI 43.4%-67.9%) when extrapolated to the full nonpositive pool. False negatives varied across channels (χ²<sub>10</sub>=28.8; <italic>P</italic>=.001) and comment length strata (χ²<sub>2</sub>=19.4; <italic>P</italic>&#60;.001), with KenDBerryMD (16.7%) and Eric Berg DC (11.4%) showing the highest channel rates and medium-length (10%) and long (11.7%) comments generating more false negatives than short comments (1.7%). The dominant miss reason was structural pattern mismatch (23 of 27, 85%), indicating that the classifier keyword dictionary is adequate, but its syntactic pattern set does not capture all expression forms. Applying precision estimates to the classified corpus yields 1747 estimated true-positive health outcome reports (95% CI 1713-1764).</p>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>To characterize the framework’s failure modes, we examined all false positives from the precision validation (n=11 across 9 unique comments) and all false negatives from the recall estimation (n=27). Three systematic categories of false positive errors were identified: third-party outcome references (4 of 11 cases, 36%), where comments described health improvements experienced by family members or acquaintances rather than the commenter; negative overall trajectory contexts (4 of 11, 36%); and nonspecific or advice-based language (3 of 11, 27%).</p>
        <p>Among false positives, the dominant pattern was positive signals embedded in negative overall trajectories (4/11, 36%), followed by nonspecific or advice-based language (3/11, 27%) and third-party reports (2/11, 18%). These cases require discourse-level sentiment analysis beyond the current sentence-level pattern matching.</p>
        <p>False negative analysis revealed structural pattern mismatch as the dominant miss mechanism (23/27, 85%), indicating that the keyword dictionary is adequate, but syntactic patterns do not capture all expression forms. The most frequently missed aspects were RO2.1 (general well-being, 17/27), RO1.8 (energy, 6/27), and RO1.2 (body composition, 5/27). Targeted syntactic rule expansion, rather than vocabulary expansion, represents the primary pathway to improved recall (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Error analysis for a rule-based natural language processing framework classifying self-reported health outcomes in English-language YouTube comments on metabolic health channels (N=43,111 unique comments, November 2013 to January 2026). False positives (n=11) came from precision validation of 500 samples; false negatives (n=27) came from recall estimation of 510 samples.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="310"/>
            <col width="180"/>
            <col width="410"/>
            <thead>
              <tr valign="top">
                <td>Type</td>
                <td>Category</td>
                <td>Errors, n (%)</td>
                <td>Suggested remedy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>FP<sup>a</sup></td>
                <td>Third-party references</td>
                <td>4 (36)</td>
                <td>Expand person-reference filters</td>
              </tr>
              <tr valign="top">
                <td>FP</td>
                <td>Negative trajectory context</td>
                <td>4 (36)</td>
                <td>Discourse-level sentiment analysis</td>
              </tr>
              <tr valign="top">
                <td>FP</td>
                <td>Nonspecific or advice language</td>
                <td>3 (27)</td>
                <td>Tighter personal experience requirements</td>
              </tr>
              <tr valign="top">
                <td>FN<sup>b</sup></td>
                <td>Borderline or debatable positives</td>
                <td>7 (26)</td>
                <td>Broader outcome definitions (recall trade-off)</td>
              </tr>
              <tr valign="top">
                <td>FN</td>
                <td>Colloquial symptom language</td>
                <td>10 (37)</td>
                <td>Vocabulary expansion</td>
              </tr>
              <tr valign="top">
                <td>FN</td>
                <td>Implicit emotional language</td>
                <td>8 (30)</td>
                <td>ML<sup>c</sup>-based semantic classification</td>
              </tr>
              <tr valign="top">
                <td>FN</td>
                <td>Missed pattern coverage</td>
                <td>2 (7)</td>
                <td>Rule refinement</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>FP: false positive; n=11.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>FN: false negative; n=27.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>ML: machine learning.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>External validation was conducted on 12,653 comments from 5 YouTube channels not included in the development corpus, selected by an independent co-author. The classifier achieved 93.4% precision (227/243; 95% CI 89.6%-95.9%) on the external corpus, with CIs overlapping those from the development corpus (97.6%), confirming generalizability within the metabolic health domain. Recall was estimated at 50.1% (95% CI 31.4%-59.1%), consistent with the development corpus. Full external validation protocol and results are presented in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p>
      </sec>
      <sec>
        <title>Interrater Reliability</title>
        <p><xref ref-type="table" rid="table5">Table 5</xref> presents the interrater reliability results across 4 coding dimensions and 3 comparison pairs, with raw Cohen κ and percentage agreement as primary metrics.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Interrater reliability for validation of a rule-based natural language processing framework classifying self-reported health outcomes in English-language YouTube comments on 11 metabolic health channels (N=43,111 unique comments, November 2013 to January 2026). Reliability was assessed across four coding dimensions and three comparison pairs. Cohen κ and percent agreement are reported as primary metrics. Prevalence index (PI) [<xref ref-type="bibr" rid="ref49">49</xref>] and bias index (BI) [<xref ref-type="bibr" rid="ref52">52</xref>] quantify the κ paradox components; Κ interpretation follows Landis and Koch [<xref ref-type="bibr" rid="ref47">47</xref>]. The validation set was drawn from classifier-positive comments, producing approximately 90% positive prevalence that depresses κ values via the κ paradox [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>].</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="170"/>
            <col width="230"/>
            <col width="140"/>
            <col width="130"/>
            <col width="160"/>
            <col width="90"/>
            <col width="80"/>
            <thead>
              <tr valign="bottom">
                <td>Comparison</td>
                <td>Dimension</td>
                <td>Agree, n (%)</td>
                <td>Cohen κ</td>
                <td>Interpretation</td>
                <td>PI<sup>a</sup></td>
                <td>BI<sup>b</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Human vs GPT-4o</td>
                <td>Positive health outcome</td>
                <td>394/471 (83.7)</td>
                <td>0.297</td>
                <td>Fair</td>
                <td>0.892</td>
                <td>0.076</td>
              </tr>
              <tr valign="top">
                <td>Human vs GPT-4o</td>
                <td>First-person testimony</td>
                <td>463/472 (98.1)</td>
                <td>0.658</td>
                <td>Substantial</td>
                <td>0.962</td>
                <td>0.019</td>
              </tr>
              <tr valign="top">
                <td>Human vs GPT-4o</td>
                <td>Definite outcome</td>
                <td>316/472 (66.9)</td>
                <td>0.130</td>
                <td>Slight</td>
                <td>0.788</td>
                <td>0.242</td>
              </tr>
              <tr valign="top">
                <td>Human vs GPT-4o</td>
                <td>Aspect assignment</td>
                <td>289/472 (61.2)</td>
                <td>0.106</td>
                <td>Slight</td>
                <td>0.896</td>
                <td>0.326</td>
              </tr>
              <tr valign="top">
                <td>Human vs GPT-4.1</td>
                <td>Positive health outcome</td>
                <td>410/471 (87)</td>
                <td>0.332</td>
                <td>Fair</td>
                <td>0.892</td>
                <td>0.045</td>
              </tr>
              <tr valign="top">
                <td>Human vs GPT-4.1</td>
                <td>First-person testimony</td>
                <td>468/471 (99.4)</td>
                <td>0.839</td>
                <td>Almost perfect</td>
                <td>0.966</td>
                <td>0.006</td>
              </tr>
              <tr valign="top">
                <td>Human vs GPT-4.1</td>
                <td>Definite outcome</td>
                <td>389/472 (82.4)</td>
                <td>0.205</td>
                <td>Slight</td>
                <td>0.788</td>
                <td>0.040</td>
              </tr>
              <tr valign="top">
                <td>Human vs GPT-4.1</td>
                <td>Aspect assignment</td>
                <td>267/472 (56.6)</td>
                <td>0.082</td>
                <td>Slight</td>
                <td>0.896</td>
                <td>0.373</td>
              </tr>
              <tr valign="top">
                <td>GPT-4o vs GPT-4.1</td>
                <td>Positive health outcome</td>
                <td>445/472 (94.3)</td>
                <td>0.771</td>
                <td>Substantial</td>
                <td>0.767</td>
                <td>0.044</td>
              </tr>
              <tr valign="top">
                <td>GPT-4o vs GPT-4.1</td>
                <td>First-person testimony</td>
                <td>463/472 (98.1)</td>
                <td>0.706</td>
                <td>Substantial</td>
                <td>0.928</td>
                <td>0.013</td>
              </tr>
              <tr valign="top">
                <td>GPT-4o vs GPT-4.1</td>
                <td>Definite outcome</td>
                <td>375/472 (79.4)</td>
                <td>0.476</td>
                <td>Moderate</td>
                <td>0.305</td>
                <td>0.201</td>
              </tr>
              <tr valign="top">
                <td>GPT-4o vs GPT-4.1</td>
                <td>Aspect assignment</td>
                <td>385/472 (81.6)</td>
                <td>0.655</td>
                <td>Substantial</td>
                <td>0.523</td>
                <td>0.072</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>PI: prevalence index.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>BI: bias index.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>On the primary coding dimension (positive health outcome identification), raw percent agreement was high (83.7%-94.3%), but Cohen κ values were lower (0.297-0.771), reflecting the well-documented κ paradox [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]: the validation set’s 90.4% positive prevalence leaves limited headroom for κ above chance. Bias Index values (0.044-0.076) confirm that low κ is driven by prevalence rather than systematic rater bias.</p>
        <p>For personal testimony identification, agreement was near-ceiling across all pairs. GPT-4.1 achieved the highest κ observed in this study (κ=0.839, 99.4% agreement), while GPT-4o achieved substantial agreement (κ=0.658, 98.1%). The lower κ for GPT-4o, despite 98.1% agreement, again reflects the prevalence paradox: with 98% of comments coded as first-person testimony, κ is constrained even at very high observed agreement.</p>
        <p>On secondary dimensions (definiteness and aspect correctness), agreement was lower, with systematic directional bias: both LLMs applied stricter evidentiary standards than the human coder (McNemar <italic>P</italic>&#60;.001 and <italic>P</italic>=.048), consistent with measurement bias characterized in the LLM Bias Audit (<xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>).</p>
        <p>The cross-model comparison provides the strongest reliability evidence: two independent architectures achieved substantial agreement on positive outcome identification (κ=0.771, 94.3%), the highest human-level or cross-model κ for this dimension. This cross-architecture convergence suggests that the coding task is well-specified: two independent systems, given the same instructions, reach similar conclusions. The cross-model κ is less affected by the prevalence paradox because both models have more balanced marginal distributions than the human-vs-LLM comparisons (PI=0.767 vs 0.892).</p>
      </sec>
      <sec>
        <title>Prevalence and Distribution by Research Objective</title>
        <p>The raw prevalence of classified positive outcomes was 4.15% (1790/43,111). Adjusted for precision, the estimated true positive prevalence is 4.05%. Outcomes were distributed across ROs as shown in <xref ref-type="table" rid="table6">Table 6</xref>. Of these, 50.3% of positive outcome comments (n=3355) spanned multiple ROs, indicating users frequently report improvements across multiple health dimensions simultaneously.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Distribution of positive health outcomes by research objective, extracted from English-language YouTube comments on 11 metabolic health channels using a rule-based natural language processing framework (n=6674 positive outcomes in n=1790 positive reports among N=43,111 unique comments, November 2013 to January 2026). Percentages sum to more than 100% because 50.3% of outcomes span multiple research objectives. Wilson score 95% CIs.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="400"/>
            <col width="480"/>
            <thead>
              <tr valign="top">
                <td>RO<sup>a</sup></td>
                <td>Description</td>
                <td>Positive outcomes (n=6674), n (%; 95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>RO1</td>
                <td>Subjective well-being</td>
                <td>3456 (51.8; 50.6-53)</td>
              </tr>
              <tr valign="top">
                <td>RO2</td>
                <td>Tool-mediated validation</td>
                <td>5350 (80.2; 79.2-81.1)</td>
              </tr>
              <tr valign="top">
                <td>RO3</td>
                <td>Disease specificity</td>
                <td>2032 (30.5; 29.4-31.6)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>RO: research objective.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Health Aspect Analysis</title>
        <p><xref ref-type="table" rid="table7">Table 7</xref> presents the top 10 health aspects by frequency. Anthropometric changes (primarily weight loss) dominated at 73% (4870/6674) of positive outcomes, consistent with the metabolic health focus of the source content. Pain and inflammation reduction (1137/6674, 17%) and type 2 diabetes improvement (977/6674, 14.6%) were the second and third most reported outcomes, suggesting clinically significant health impacts beyond aesthetic weight changes.</p>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Top 10 most frequently reported health aspects among self-reported positive outcomes extracted from English-language YouTube comments on 11 metabolic health channels (n=1790 reports of 6674 positive outcomes among N=43,111 unique comments, November 2013 to January 2026). Percentages computed relative to total positive outcomes. Wilson score 95% CIs.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="54"/>
            <col width="89"/>
            <col width="416"/>
            <col width="441"/>
            <thead>
              <tr valign="top">
                <td>#</td>
                <td>ID</td>
                <td>Aspect</td>
                <td>Positive outcomes, n (%; 95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>RO<sup>a</sup> 2.1</td>
                <td>Anthropometric changes</td>
                <td>4870 (73; 71.9-74.1)</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>RO1.6</td>
                <td>Pain and inflammation</td>
                <td>1137 (17; 16.2-18)</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>RO3.1</td>
                <td>Type 2 diabetes</td>
                <td>977 (14.6; 13.8-15.5)</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>RO1.8</td>
                <td>Skin health</td>
                <td>784 (11.8; 11-12.5)</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>RO1.3</td>
                <td>Psychological well-being</td>
                <td>731 (11; 10.2-11.7)</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>RO1.5</td>
                <td>Appetite and satiety</td>
                <td>677 (10.1; 9.4-10.9)</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>RO1.7</td>
                <td>Digestive health</td>
                <td>664 (10; 9.3-10.7)</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>RO1.2</td>
                <td>Energy and vitality</td>
                <td>651 (9.8; 9.1-10.5)</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>RO2.2</td>
                <td>Glycemic control</td>
                <td>564 (8.5; 7.8-9.1)</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>RO2.3</td>
                <td>Blood pressure</td>
                <td>548 (8.2; 7.6-8.9)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>RO: research objective.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Channel-Level Variation</title>
        <p>Significant variation in positive outcome rates was observed across channels (χ²₁₀=927.5; <italic>P</italic>&#60;.001), as shown in <xref ref-type="table" rid="table8">Table 8</xref>. Rates ranged from 1.32% (Shawn Baker, MD) to 10.40% (KenDBerryMD), yielding an odds ratio of 8.68 between the highest and lowest channels. Cramér <italic>V</italic>=0.147 indicates a small but statistically significant effect size, suggesting that while channel-level differences exist, they explain a modest proportion of total variance in outcome reporting.</p>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Channel-level variation in positive health outcome rates in a cross-sectional computational analysis of self-reported outcomes from English-language YouTube comments on 11 metabolic health channels (N=43,111 unique comments, November 2013 to January 2026). Channels ordered by descending positive outcome rate. Wilson score 95% CIs.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="560"/>
            <col width="440"/>
            <thead>
              <tr valign="top">
                <td>Channels</td>
                <td>Positive outcomes, n/N (%; 95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>KenDBerryMD</td>
                <td>413/3970 (10.40; 9.49-11.39)</td>
              </tr>
              <tr valign="top">
                <td>Jason Fung</td>
                <td>286/3963 (7.22; 6.45-8.06)</td>
              </tr>
              <tr valign="top">
                <td>Eric Berg DC</td>
                <td>282/3993 (7.06; 6.31-7.90)</td>
              </tr>
              <tr valign="top">
                <td>Eric Westman</td>
                <td>250/3978 (6.28; 5.57-7.08)</td>
              </tr>
              <tr valign="top">
                <td>Anthony Chaffee MD</td>
                <td>104/3972 (2.62; 2.17-3.16)</td>
              </tr>
              <tr valign="top">
                <td>Dr. Robert Cywes MD</td>
                <td>102/3932 (2.59; 2.14-3.14)</td>
              </tr>
              <tr valign="top">
                <td>Dr. Boz</td>
                <td>86/3959 (2.17; 1.76-2.67)</td>
              </tr>
              <tr valign="top">
                <td>Nick Norwitz</td>
                <td>82/3954 (2.07; 1.67-2.57)</td>
              </tr>
              <tr valign="top">
                <td>Ben Bikman</td>
                <td>65/3477 (1.87; 1.47-2.38)</td>
              </tr>
              <tr valign="top">
                <td>Mark Hyman</td>
                <td>68/3980 (1.71; 1.35-2.16)</td>
              </tr>
              <tr valign="top">
                <td>Shawn Baker MD</td>
                <td>52/3933 (1.32; 1.01-1.73)</td>
              </tr>
              <tr valign="top">
                <td>Overall</td>
                <td>1790/43,111 (4.15; 3.96-4.35)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Outcome Category Distribution</title>
        <p>Analysis of outcome indicator types (<xref ref-type="table" rid="table9">Table 9</xref>) revealed that quantified changes (eg, “lost 30 pounds,” “A1C dropped to 5.4”) comprised 74.4% (1331/1790) of positive outcomes. Symptom cessation reports (eg, “joint pain gone”) accounted for 14.5% (259/1790), explicit improvement language (eg, “feel so much better”) for 11.8% (212/1790), and disease reversal or remission claims (eg, “reversed my type 2 diabetes”) for 6.4% (114/1790). Medication discontinuation (eg, “off all medications”) represented 3.9% (69/1790), and temporal improvements (eg, “since starting keto…lost 20 pounds”) represented 2.3% (42/1790) of outcomes reported.</p>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Distribution of outcome indicator categories in a cross-sectional computational analysis of self-reported health outcomes from English-language YouTube comments on 11 metabolic health channels (n=1790 positive outcomes from N=43,111 unique comments, November 2013 to January 2026). Categories are not mutually exclusive; a single comment may contain multiple indicator types.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="520"/>
            <col width="480"/>
            <thead>
              <tr valign="top">
                <td>Outcome category</td>
                <td>Positive outcomes, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Quantified change</td>
                <td>1331 (74.4)</td>
              </tr>
              <tr valign="top">
                <td>Symptom cessation</td>
                <td>259 (14.5)</td>
              </tr>
              <tr valign="top">
                <td>Explicit improvement</td>
                <td>212 (11.8)</td>
              </tr>
              <tr valign="top">
                <td>Reversal or remission</td>
                <td>114 (6.4)</td>
              </tr>
              <tr valign="top">
                <td>Medication discontinuation</td>
                <td>69 (3.9)</td>
              </tr>
              <tr valign="top">
                <td>Temporal improvement</td>
                <td>42 (2.3)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Sentiment Contextualization: ABSA</title>
        <p>A supplementary ABSA was conducted to contextualize the positive-outcome findings within the broader health discourse of the corpus [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>].</p>
        <p>Intermodel agreement on health-related classification was 93.1% (915/983), with sentiment agreement of 87.6% (495/565), indicating acceptable coding consistency for an exploratory contextualization analysis.</p>
        <p><xref ref-type="table" rid="table1">Table 10</xref> presents the consensus sentiment distribution, the subset of health-related comments where both models agreed on sentiment classification. Among 495 consensus-coded health-related comments, positive sentiment accounted for 54.7% (271/495), negative for 11.9% (59/495), neutral for 15.6% (77/495), and mixed for 17.8% (88/495), yielding a positive-to-negative ratio of 4.6:1.</p>
        <table-wrap position="float" id="table10">
          <label>Table 10</label>
          <caption>
            <p>Aspect-based sentiment analysis (ABSA) consensus sentiment distribution among health-related English-language YouTube comments on 11 metabolic health channels (n=495 consensus-coded comments from a stratified sample of 1003 drawn from N=43,111 unique comments, November 2013 to January 2026). Dual-model classification (GPT-4o and GPT-4.1) with consensus defined as agreement on both health-relatedness and sentiment polarity.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="380"/>
            <col width="290"/>
            <thead>
              <tr valign="bottom">
                <td>Sentiment<sup>a</sup></td>
                <td>Comments, n (%)</td>
                <td>Estimated full corpus</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Positive</td>
                <td>271 (54.7)</td>
                <td>~63,800</td>
              </tr>
              <tr valign="top">
                <td>Negative</td>
                <td>59 (11.9)</td>
                <td>~13,900</td>
              </tr>
              <tr valign="top">
                <td>Neutral</td>
                <td>77 (15.6)</td>
                <td>~18,200</td>
              </tr>
              <tr valign="top">
                <td>Mixed</td>
                <td>88 (17.8)</td>
                <td>~20,800</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table10fn1">
              <p><sup>a</sup>Consensus: both GPT-4o and GPT-4.1 agreed on sentiment classification. Corpus estimates extrapolated from 64.4% health-related rate in the stratified sample (n=983). Positive-to-negative ratio=4.6:1.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p><xref ref-type="table" rid="table1">Table 11</xref> presents the breakdown of the consensus-negative aspect. Gastrointestinal issues (n=36) and cardiovascular concerns (n=22, primarily LDL cholesterol elevations) were the most frequent negative aspects, followed by pain and inflammation (n=14) and energy and mood disturbances (n=12), aligning with documented adaptation effects during carbohydrate restriction transitions.</p>
        <table-wrap position="float" id="table11">
          <label>Table 11</label>
          <caption>
            <p>Consensus-negative health aspects identified by dual-model aspect-based sentiment analysis (ABSA) of English-language YouTube comments on 11 metabolic health channels (N=43,111 unique comments, November 2013 to January 2026). Both GPT-4o and GPT-4.1 agreed on negative sentiment classification for each aspect listed.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="470"/>
            <col width="280"/>
            <col width="250"/>
            <thead>
              <tr valign="bottom">
                <td>Health aspect</td>
                <td>Comments reaching consensus<sup>a</sup>, n</td>
                <td>Clinical context</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Digestive</td>
                <td>36</td>
                <td>GI<sup>b</sup> adaptation</td>
              </tr>
              <tr valign="top">
                <td>Cardiovascular</td>
                <td>22</td>
                <td>LDL<sup>c</sup> concerns</td>
              </tr>
              <tr valign="top">
                <td>Pain and inflammation</td>
                <td>14</td>
                <td>Adaptation effects</td>
              </tr>
              <tr valign="top">
                <td>Energy and mood</td>
                <td>12</td>
                <td>Transition fatigue</td>
              </tr>
              <tr valign="top">
                <td>Neurological</td>
                <td>12</td>
                <td>Keto adaptation</td>
              </tr>
              <tr valign="top">
                <td>Blood sugar</td>
                <td>11</td>
                <td>Glycemic worsening</td>
              </tr>
              <tr valign="top">
                <td>Weight change</td>
                <td>11</td>
                <td>Weight stall or gain</td>
              </tr>
              <tr valign="top">
                <td>Other (sleep, diet adherence, mental health, medication, general well-being, cancer, skin, autoimmune, and hormonal)</td>
                <td>63</td>
                <td>Various</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table11fn1">
              <p><sup>a</sup>Consensus-negative: both GPT-4o and GPT-4.1 independently classified the aspect sentiment as negative. Total exceeds 59 comments because some comments contain multiple negative aspects.</p>
            </fn>
            <fn id="table11fn2">
              <p><sup>b</sup>GI: gastrointestinal.</p>
            </fn>
            <fn id="table11fn3">
              <p><sup>c</sup>LDL: low-density lipoprotein.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The 4.6:1 positive-to-negative ratio in this sample is consistent with the expected self-selection dynamics of the channels studied, while the presence of negative experiences at a meaningful rate suggests that the positive predominance of the primary framework is not solely an artefact of its positive-only scope. This finding is exploratory: generalization to the full corpus would require applying the same rigor used for the primary framework.</p>
        <p><xref ref-type="table" rid="table1">Table 12</xref> presents aspect-level ratios, revealing that the positive predominance is not uniformly distributed. Weight change (8.9:1) and general well-being (7.1:1) exhibit the strongest positive skew, while digestive health (0.8:1), neurological symptoms (0.5:1), and hormonal concerns (0.3:1) are negative-dominant, reflecting known adaptation effects. The cardiovascular domain shows near-parity (1.0:1), consistent with the contested nature of LDL cholesterol responses to high-fat diets.</p>
        <table-wrap position="float" id="table12">
          <label>Table 12</label>
          <caption>
            <p>Aspect-level sentiment distribution by health domain in English-language YouTube comments on 11 metabolic health channels (GPT-4.1 classification, n=627 health-related comments from a stratified sample of 1003 drawn from N=43,111 unique comments, November 2013 to January 2026). Positive-to-negative ratio computed for aspects with at least 5 negative mentions.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="110"/>
            <col width="120"/>
            <col width="100"/>
            <col width="90"/>
            <col width="150"/>
            <col width="200"/>
            <thead>
              <tr valign="bottom">
                <td>Health aspect<sup>a</sup></td>
                <td>Positive, n</td>
                <td>Negative, n</td>
                <td>Neutral, n</td>
                <td>Mixed, n</td>
                <td>Positive to negative ratio</td>
                <td>Negative, n/N (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Weight change</td>
                <td>241</td>
                <td>27</td>
                <td>27</td>
                <td>15</td>
                <td>8.9:1</td>
                <td>27/310 (8.7)</td>
              </tr>
              <tr valign="top">
                <td>General well-being</td>
                <td>213</td>
                <td>30</td>
                <td>13</td>
                <td>2</td>
                <td>7.1:1</td>
                <td>30/258 (11.6)</td>
              </tr>
              <tr valign="top">
                <td>Skin</td>
                <td>21</td>
                <td>5</td>
                <td>1</td>
                <td>0</td>
                <td>4.2:1</td>
                <td>5/27 (18.5)</td>
              </tr>
              <tr valign="top">
                <td>Medication</td>
                <td>30</td>
                <td>10</td>
                <td>34</td>
                <td>2</td>
                <td>3.0:1</td>
                <td>10/76 (13.2)</td>
              </tr>
              <tr valign="top">
                <td>Energy and mood</td>
                <td>52</td>
                <td>18</td>
                <td>1</td>
                <td>3</td>
                <td>2.9:1</td>
                <td>18/74 (24.3)</td>
              </tr>
              <tr valign="top">
                <td>Blood sugar</td>
                <td>51</td>
                <td>19</td>
                <td>45</td>
                <td>1</td>
                <td>2.7:1</td>
                <td>19/116 (16.4)</td>
              </tr>
              <tr valign="top">
                <td>Diet adherence</td>
                <td>96</td>
                <td>38</td>
                <td>60</td>
                <td>8</td>
                <td>2.5:1</td>
                <td>38/202 (18.8)</td>
              </tr>
              <tr valign="top">
                <td>Sleep</td>
                <td>22</td>
                <td>9</td>
                <td>1</td>
                <td>0</td>
                <td>2.4:1</td>
                <td>9/32 (28.1)</td>
              </tr>
              <tr valign="top">
                <td>Pain and inflammation</td>
                <td>54</td>
                <td>26</td>
                <td>5</td>
                <td>0</td>
                <td>2.1:1</td>
                <td>26/85 (30.6)</td>
              </tr>
              <tr valign="top">
                <td>Cardiovascular</td>
                <td>33</td>
                <td>32</td>
                <td>41</td>
                <td>6</td>
                <td>1.0:1</td>
                <td>32/112 (28.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>Digestive</italic>
                </td>
                <td>46</td>
                <td>56</td>
                <td>27</td>
                <td>2</td>
                <td>0.8:1</td>
                <td>56/131 (42.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>Mental health</italic>
                </td>
                <td>7</td>
                <td>11</td>
                <td>1</td>
                <td>0</td>
                <td>0.6:1</td>
                <td>11/19 (57.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>Neurological</italic>
                </td>
                <td>7</td>
                <td>15</td>
                <td>3</td>
                <td>0</td>
                <td>0.5:1</td>
                <td>15/25 (60.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>Hormonal</italic>
                </td>
                <td>2</td>
                <td>6</td>
                <td>8</td>
                <td>1</td>
                <td>0.3:1</td>
                <td>6/17 (35.3)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table12fn1">
              <p><sup>a</sup>Italicized aspects are negative-dominant (ratio &#60;1.0), that is, the percentage of all mentions within that aspect classified as negative. GPT-4.1 single-model classification (n=627 health-related comments, n=1602 total aspect mentions).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>To contextualize the rule-based framework’s performance, BERT-base-uncased and RoBERTa-base classifiers were trained on the combined validation datasets (n=836; using a five-fold stratified cross-validation). Both transformer models achieved substantially higher recall (93.4% and 95.7%) but lower precision (87% and 88.2%) than the rule-based framework (97.6%), confirming the design advantage of precision optimization for high-confidence corpus generation. Full transformer baseline comparison results are presented in <xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>.</p>
        <p><xref ref-type="table" rid="table1">Table 13</xref> positions this work relative to prior approaches, with the key differentiation being explicit precision optimization for high-confidence corpus generation.</p>
        <table-wrap position="float" id="table13">
          <label>Table 13</label>
          <caption>
            <p>Comparison of the proposed rule-based natural language processing framework with prior social media health text classification approaches. The proposed framework was applied to English-language YouTube comments on 11 metabolic health channels (N=43,111 unique comments, November 2013 to January 2026). Precision and recall are reported for the positive class (health event).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="290"/>
            <col width="130"/>
            <col width="160"/>
            <col width="130"/>
            <col width="110"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Study</td>
                <td>Platform</td>
                <td>Approach</td>
                <td>Precision</td>
                <td>Recall</td>
                <td>Ontology</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Sarker and Gonzalez [<xref ref-type="bibr" rid="ref53">53</xref>]</td>
                <td>Twitter</td>
                <td>ML<sup>a</sup> (SVM)<sup>b</sup></td>
                <td>85%</td>
                <td>82%</td>
                <td>ADR-based</td>
              </tr>
              <tr valign="top">
                <td>Nikfarjam et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td>
                <td>Twitter</td>
                <td>CRF<sup>c</sup></td>
                <td>87%</td>
                <td>71%</td>
                <td>UMLS</td>
              </tr>
              <tr valign="top">
                <td>Golder et al [<xref ref-type="bibr" rid="ref43">43</xref>]</td>
                <td>Multi</td>
                <td>ML (Review)</td>
                <td>80-90%</td>
                <td>Varies</td>
                <td>Varies</td>
              </tr>
              <tr valign="top">
                <td>Magge et al [<xref ref-type="bibr" rid="ref54">54</xref>]</td>
                <td>Twitter</td>
                <td>DL<sup>d</sup> (RoBERTa)</td>
                <td>63%<sup>e</sup></td>
                <td>63%<sup>e</sup></td>
                <td>MedDRA</td>
              </tr>
              <tr valign="top">
                <td>This study</td>
                <td>YouTube</td>
                <td>Rule-based</td>
                <td>97.6%</td>
                <td>56.2%</td>
                <td>Custom</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table13fn1">
              <p><sup>a</sup>ML: machine learning.</p>
            </fn>
            <fn id="table13fn2">
              <p><sup>b</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table13fn3">
              <p><sup>c</sup>CRF: conditional random field.</p>
            </fn>
            <fn id="table13fn4">
              <p><sup>d</sup>DL: deep learning.</p>
            </fn>
            <fn id="table13fn5">
              <p><sup>e</sup>Evaluated at a realistic 7% positive rate. At 2% positive rate (comparable to this study’s 4.15%), precision dropped to 21% and recall to 25%.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study set out to determine whether self-reported positive health outcomes can be systematically extracted from YouTube comments on metabolic health healthcasting channels, and, if so, to characterize their prevalence, distribution across health aspects, variation across content creators, and the classification accuracy required to generate a validated corpus.</p>
        <sec>
          <title>RQ1: What Is the Prevalence of Self-Reported Positive Health Outcomes in YouTube Comments on Metabolic Health Content?</title>
          <p>The classification framework identified 1790 definite positive self-reported health outcome comments from a corpus of 43,111 unique comments across eleven metabolic health healthcasting channels, corresponding to a raw prevalence of 4.15%. These reports are unsolicited first-person accounts of health improvement, posted spontaneously under creator videos rather than in response to surveys or prompts, making them a distinctive source of real-world health data. The observed prevalence remained stable during external validation on an independent set of 5 held-out channels, suggesting that positive outcome reporting occurs at a consistent, detectable frequency across healthcasting communities in this domain. A supplementary sentiment analysis of the broader health discourse confirmed that positive outcome reporting constitutes a substantial but not overwhelming share of health-related conversation, with a positive-to-negative ratio of approximately 4.6:1.</p>
        </sec>
        <sec>
          <title>RQ2: What Types of Health Outcomes Are Most Frequently Reported, and How Are They Distributed Across Subjective, Objective, and Disease-Specific Categories?</title>
          <p>The outcome landscape that emerged from the corpus was considerably broader than the weight-loss framing commonly associated with carbohydrate restriction [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Positive outcomes were systematically organized using a newly developed 35-aspect hierarchical ontology aligned with 3 complementary ROs: subjective well-being (how people feel day to day), objectively measurable changes (such as blood glucose and body composition), and named disease-specific improvements. Over half of positive outcome comments described improvements across multiple health dimensions, suggesting that commenters experience and report broad, interconnected health changes rather than isolated improvements. While changes in weight and body composition dominated, the most frequently reported outcomes also included reductions in pain and inflammation, improvements in glycemic control, better skin health, enhanced psychological well-being, and improved regulation of appetite, digestive health, and energy levels. Reports also identified improvements or remission across eighteen medical conditions, including type 2 diabetes (often described as reversed or resolved), fatty liver disease, hypertension, and polycystic ovary syndrome. Most reports described quantified changes (such as specific weight loss or improved blood markers), though symptom cessation, explicit improvement language, and disease reversal or remission claims were also common. Notably, medication discontinuation reports were present in the corpus, indicating that some commenters stopped prescription medication in connection with dietary changes described in creator content. The supplementary sentiment analysis added nuance to these findings: while weight change and general well-being showed strong positive skew, the digestive, neurological, and hormonal domains showed more negative than positive reports, consistent with known adaptation effects during transitions to carbohydrate restriction [<xref ref-type="bibr" rid="ref17">17</xref>]. This finding confirms that negative health experiences are present in these communities at a meaningful rate, providing important context for interpreting the positive outcomes identified by the primary framework.</p>
        </sec>
        <sec>
          <title>RQ3: Does Positive Outcome Reporting Vary Significantly Across Content Creators, and What Factors May Explain This Variation?</title>
          <p>Positive outcome reporting was not uniformly distributed across the eleven channels, with rates ranging from 1.32% to 10.40%, a nearly ninefold difference that suggests a structural rather than a random pattern. Four channels clustered at substantially higher rates (KenDBerryMD, Jason Fung, Eric Berg DC, and Eric Westman), while the remaining 7 fell below 3%. Because comment volume was balanced across channels, this heterogeneity cannot be attributed to differences in sample size, and the pattern persisted in the external validation. The data suggest that this variation aligns with differences in creator discourse style: channels whose creators adopted accessible, user-facing communication tended to elicit longer average comments and higher testimonial rates, with outcome reports spanning a broad range of health aspects and frequently including narratives of symptom cessation and disease reversal. In contrast, channels with more scientifically oriented discourse produced fewer testimonials overall, but those that did appear were notably longer and more detailed, with higher rates of reports of medication discontinuation and outcomes spanning multiple health dimensions simultaneously. This influence extended to comment depth itself, as false-negative rates were significantly higher among medium-length and long comments than among short ones, suggesting that the more elaborate forms of self-disclosure encouraged by certain creator styles also produce more complex expressions of health outcomes that are harder for rule-based systems to capture. Together, these findings indicate that channel-level characteristics, including content style, creator approach, and community norms, meaningfully shape not only the volume but also the depth and nature of testimonial discourse within healthcasting environments.</p>
        </sec>
        <sec>
          <title>RQ4: Can a Precision-Optimized Rule-Based Framework Achieve Sufficient Classification Accuracy for Generating Validated Health Outcome Corpora From User-Generated Content?</title>
          <p>The 3-stage rule-based classifier achieved 97.6% precision in the development corpus, and this level held up under external validation across 5 held-out channels, with overlapping confidence intervals, confirming that the framework generalizes beyond the data on which it was built. Estimated recall was 56.2%, reflecting the deliberate design choice to prioritize the validity of every included case over exhaustive detection: the system captures roughly half of all positive outcome reports, but nearly everyone it identifies is correct. This trade-off yielded a validated corpus of health outcome reports at scale without manual review, demonstrating that the framework can generate research-ready datasets from large comment corpora. To assess whether the classification task itself is well defined rather than dependent on a single coder’s judgment, 2 independent AI systems (GPT-4o and GPT-4.1) were given the same coding instructions and reached substantial agreement on core outcome identification, with the strongest reliability emerging when the 2 models were compared directly to each other rather than to the human coder. A head-to-head comparison with fine-tuned deep learning models (BERT and RoBERTa) confirmed that the precision advantage is built into the rule-based design: both models achieved higher recall but lower precision, meaning that for every gain in detection coverage, a substantial number of incorrect classifications would be introduced into the corpus. Error analysis revealed that most missed cases occurred not because the framework lacked relevant health keywords, but because commenters expressed their outcomes in sentence structures the rules did not anticipate. This suggests that expanding the range of recognized expression patterns, rather than adding new terminology, is the most direct route to improved recall. Together, these results establish the framework as a reliable, fully transparent, and reproducible methodology for extracting self-reported health outcomes from unstructured user-generated content at scale, with every classification decision traceable to specific rules.</p>
        </sec>
      </sec>
      <sec>
        <title>Implications for Research and Practice</title>
        <p>Computational infodemiology has increasingly established social media as a valuable source of real-world health data, yet systematic extraction methodologies have focused almost exclusively on pharmacovigilance and adverse-event detection from microblogging platforms [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>], whereas qualitative research on online health communities has examined forum-based and support-group settings [<xref ref-type="bibr" rid="ref57">57</xref>]. A substantial and growing body of health-related discourse exists in a setting that has received comparatively little methodological attention: the comment sections of expert-led YouTube health channels, where tens of thousands of individuals respond to long-form, creator-led content with first-person accounts of health changes [<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref59">59</xref>]. Our results demonstrate that this content layer carries health signal at a density sufficient for systematic computational extraction, establishing healthcasting as a distinct empirical setting for health informatics research. For infodemiology and digital health researchers, this offers a complementary observational channel that captures a population segment (individuals who self-direct dietary interventions informed by credentialed online content) largely invisible to clinical registries and pharmacovigilance systems [<xref ref-type="bibr" rid="ref29">29</xref>]. The replicable 3-phase construction workflow, hierarchical ontology, and multistudy validation flow presented in this study provide a methodological template that research groups can adapt to adjacent domains, including cardiometabolic disease, chronic pain, and mental health [<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref61">61</xref>].</p>
        <p>For clinical researchers and public health practitioners, understanding the health changes patients experience and report outside clinical settings is critical for designing patient-reported outcome measures, identifying underrecognized treatment effects, and monitoring population-level engagement with dietary interventions [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>]. Our analysis reveals a self-reported outcome landscape that extends beyond the weight-loss framing commonly associated with carbohydrate restriction [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Reductions in pain and inflammation, improvements in type 2 diabetes, skin health, and psychological well-being were prominently reported, consistent with the clinical trial literature documenting multi-system effects of metabolic interventions [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>]. Reports covered eighteen named disease conditions, and over half addressed multiple ROs simultaneously, suggesting that commenters experience and report systemic rather than isolated health changes. These convergences between self-reported online data and published clinical evidence identify patient-reported outcome dimensions that merit prospective investigation using designs appropriate to each dimension [<xref ref-type="bibr" rid="ref62">62</xref>]. Perhaps most important for clinical practice, medication-discontinuation reports were present in this corpus, raising questions about patients stopping prescription therapy influenced by creator-led online content without direct medical oversight. This finding underscores the need for structured dialogue between healthcasting communities and the clinical care system to ensure that patient-initiated medication changes occur safely [<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref67">67</xref>].</p>
        <p>As digital platforms become the primary channels through which patients encounter health information and make health decisions, understanding how platform features and community dynamics shape health discourse is an increasingly important priority for health communication researchers and platform designers [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. Our analysis reveals that positive outcome reporting rates vary substantially across channels operating within the same broad dietary domain, and that this variation is structurally predictable from content style and community culture rather than random. We offer interpretive hypotheses, framed as requiring systematic testing rather than as validated findings [<xref ref-type="bibr" rid="ref57">57</xref>]: channels whose creators explicitly invite health testimonials and attract audiences with active metabolic conditions foster communities where outcome-sharing functions as a social norm, whereas science-focused channels attract audiences that engage with scientific explanations rather than personal testimony. Prior research has established that content style and audience composition shape user behavior on social platforms [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref68">68</xref>], and our results extend this principle to the specific domain of creator-led metabolic health content. This pattern is further supported by findings that channels with longer average comments had significantly higher positive-outcome rates (ρ=0.645; <italic>P</italic>=.03) and that false-negative rates increased substantially with comment length. These results indicate that the richer, more detailed forms of self-disclosure fostered by certain creator styles also generate more complex health narratives that automated extraction systems are less likely to capture fully. For researchers designing computational health discourse tools, this suggests that extraction frameworks must be calibrated not only to the health domain but also to the discourse norms of the specific communities being studied. For platform designers, these patterns suggest actionable interventions: features that surface testimonial density, link outcome claims to their evidentiary basis, or guide viewers toward content that matches their information needs could meaningfully reshape how patients encounter health testimony online [<xref ref-type="bibr" rid="ref20">20</xref>]. The framework and ontology presented here provide the measurement infrastructure needed to evaluate such interventions at scale.</p>
        <p>In the health natural language processing literature, classification systems have been optimized primarily for balanced performance, yielding precision levels insufficient for high-confidence corpus generation, in which every included observation must be valid [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. Our precision-first rule-based architecture addresses this gap, exceeding the precision reported in comparable systems (<xref ref-type="table" rid="table1">Table 1</xref>3) [<xref ref-type="bibr" rid="ref43">43</xref>]. External validation confirms that this advantage extends beyond the development corpus [<xref ref-type="bibr" rid="ref69">69</xref>]. The transformer baseline comparison, in which fine-tuned models achieved higher recall but lower precision, confirms that this advantage is architectural rather than data-dependent [<xref ref-type="bibr" rid="ref42">42</xref>]. For the broader health natural language processing community, this finding supports a practical design principle: when the RO is corpus generation rather than individual case detection, rule-based architectures with domain-specific ontologies offer a precision advantage that current deep learning approaches do not match [<xref ref-type="bibr" rid="ref43">43</xref>]. The construction and validation workflows are domain-agnostic and transferable to other chronic-disease communities and health-adjacent digital platforms, subject to ontology respecification and revalidation [<xref ref-type="bibr" rid="ref60">60</xref>].</p>
      </sec>
      <sec>
        <title>Limitations and Future Directions</title>
        <p>This study has limitations that define its scope and point to productive extensions. Commenters are a self-selected subset of viewers, and those experiencing positive outcomes may be more likely to comment, inflating the observed rate relative to the underlying population [<xref ref-type="bibr" rid="ref29">29</xref>]. The sample selection strategy prioritized the 10 most-commented videos per channel, further biasing the sample toward viral content [<xref ref-type="bibr" rid="ref68">68</xref>], and reply threads were excluded due to API constraints [<xref ref-type="bibr" rid="ref70">70</xref>]. Manual validation was performed by a single domain-expert coder, with interrater reliability assessed through LLM-assisted annotation rather than independent human domain-expert validation [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref71">71</xref>]. The observed association between creator discourse style and testimonial characteristics is correlational and based on eleven channels, limiting causal inference and the generalizability of channel-level patterns. Additionally, the significantly higher false-negative rate among longer comments means that the framework may systematically under-capture outcomes in communities whose discourse norms encourage more detailed self-reporting, compounding the channel-level variation described above. Addressing these constraints through multicoder validation at scale and incorporating reply threads is a priority for future research.</p>
        <p>The framework was developed and validated within a single dietary domain on a single platform. The ontology was engineered for TCR, and adapting it to other health domains requires modifying the ontology and revalidating it rather than direct transfer [<xref ref-type="bibr" rid="ref60">60</xref>]. Results may not generalize to platforms with different demographics, moderation norms, or content formats [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref72">72</xref>]. While external validation on held-out channels confirmed precision transfer, 3 of 5 external channels yielded fewer than fifteen classifier-positive comments, precluding reliable per-channel estimates. Extending the framework to adjacent chronic-disease communities on YouTube and to other health-adjacent platforms represents a natural next step [<xref ref-type="bibr" rid="ref61">61</xref>].</p>
        <p>The nature of self-reported content imposes additional constraints. Outcomes cannot be independently verified, and users may misattribute improvements or conflate correlation with causation; the classification only confirms that users report these outcomes, not that the underlying health claims are accurate [<xref ref-type="bibr" rid="ref73">73</xref>]. The framework extracts only positive outcomes, an asymmetry only partially mitigated by the supplementary ABSA analysis. Comments are point-in-time reports that preclude assessment of long-term sustainability [<xref ref-type="bibr" rid="ref26">26</xref>]. Developing a dedicated negative-outcome extraction framework and expanding pattern libraries to close the recall gap are the most immediate methodological extensions, enabling a more complete characterization of health discourse in creator-led digital communities.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study establishes that creator-led metabolic-health YouTube content is a scalable, computationally viable source of self-reported health outcomes and presents a replicable, precision-first methodology for extracting and validating these outcomes at the corpus scale. Beyond the methodology, the breadth of reported outcomes, the influence of creator discourse style on the volume and nature of testimonial reporting, and the medication-discontinuation signal collectively position healthcasting as a phenomenon warranting sustained attention from the health informatics, health communication, and clinical research communities. As this attention grows, the precision-first architecture and hierarchical ontology provide a transferable methodological foundation for computational health discourse analysis across chronic-disease domains and digital platforms.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>CREMLS reporting checklist.</p>
        <media xlink:href="jmir_v28i1e94855_app1.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Summary: videos and comments by channel.</p>
        <media xlink:href="jmir_v28i1e94855_app2.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 25 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Ontology structure with representative keyword and exclusion patterns.</p>
        <media xlink:href="jmir_v28i1e94855_app3.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Classification rule patterns.</p>
        <media xlink:href="jmir_v28i1e94855_app4.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>External validation protocol and results.</p>
        <media xlink:href="jmir_v28i1e94855_app5.docx" xlink:title="DOCX File , 19 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Large language model–assisted annotation prompt.</p>
        <media xlink:href="jmir_v28i1e94855_app6.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Transformer baseline comparison.</p>
        <media xlink:href="jmir_v28i1e94855_app7.docx" xlink:title="DOCX File , 198 KB"/>
      </supplementary-material>
      <supplementary-material id="app8">
        <label>Multimedia Appendix 8</label>
        <p>Aspect-based sentiment analysis prompt design and few-shot examples.</p>
        <media xlink:href="jmir_v28i1e94855_app8.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app9">
        <label>Multimedia Appendix 9</label>
        <p>Large language model annotation bias audit.</p>
        <media xlink:href="jmir_v28i1e94855_app9.docx" xlink:title="DOCX File , 22 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ABSA</term>
          <def>
            <p>aspect-based sentiment analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CREMLS</term>
          <def>
            <p>Consolidated Reporting Guidelines for Prognostic and Diagnostic Machine Learning Modeling Studies</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RO</term>
          <def>
            <p>research objective</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RoBERTa</term>
          <def>
            <p>Robustly Optimized BERT Pretraining Approach</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RQ</term>
          <def>
            <p>research question</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">TCR</term>
          <def>
            <p>therapeutic carbohydrate restriction</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Large language models (GPT-4o and GPT-4.1, OpenAI) were used as independent coders to assess interrater reliability for the manual validation sample, as described in the Interrater Reliability Assessment section. Pre-trained transformer models (BERT-base-uncased and RoBERTa-base, Hugging Face) were fine-tuned on the study dataset for the baseline comparison analysis described in the Transformer Baseline Comparison section. AI-assisted tools (Claude and Anthropic) were used for technical manuscript preparation tasks, including citation format conversion, document formatting compliance checks, and reference list verification. No AI tools were used to interpret results or draft intellectual content. All outputs were reviewed and verified by the authors, who take full responsibility for the content of this manuscript.</p>
    </ack>
    <notes>
      <title>Data Availability</title>
      <p>The datasets generated or analyzed during this study are available in the GitHub repository. Additional data are included in this published article and its supplementary information files.</p>
    </notes>
    <notes>
      <title>Funding</title>
      <p>This work was supported by Fundação para a Ciência e a Tecnologia IP (project UID/00667 Unidade de Investigação e Desenvolvimento em Engenharia Mecânica e Industrial). The funder had no involvement in study design, data collection, analysis, interpretation of data, or the writing of this manuscript.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>RR conceptualized the study, designed the methodology, developed the ontology and classification framework, collected and analyzed the data, performed the validation, and wrote the manuscript. AZ supervised the research, provided critical review, and contributed to the interpretation of findings.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Setrerrahmane</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Trends in insulin resistance: insights into mechanisms and therapeutic strategy</article-title>
          <source>Signal Transduct Target Ther</source>
          <year>2022</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>216</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41392-022-01073-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41392-022-01073-0</pub-id>
          <pub-id pub-id-type="medline">35794109</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41392-022-01073-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC9259665</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Volek</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Phinney</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Forsythe</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Quann</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Puglisi</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kraemer</surname>
              <given-names>William J</given-names>
            </name>
            <name name-style="western">
              <surname>Bibus</surname>
              <given-names>Doug M</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez</surname>
              <given-names>Maria Luz</given-names>
            </name>
            <name name-style="western">
              <surname>Feinman</surname>
              <given-names>Richard D</given-names>
            </name>
          </person-group>
          <article-title>Carbohydrate restriction has a more favorable impact on the metabolic syndrome than a low fat diet</article-title>
          <source>Lipids</source>
          <year>2009</year>
          <month>04</month>
          <volume>44</volume>
          <issue>4</issue>
          <fpage>297</fpage>
          <lpage>309</lpage>
          <pub-id pub-id-type="doi">10.1007/s11745-008-3274-2</pub-id>
          <pub-id pub-id-type="medline">19082851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noakes</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wellington</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>Ketogenic: The Science of Therapeutic Carbohydrate Restriction in Human Health</source>
          <year>2023</year>
          <month>06</month>
          <day>22</day>
          <publisher-loc>San Diego, CA</publisher-loc>
          <publisher-name>Academic Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Volek</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Phinney</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <source>The Art and Science of Low Carbohydrate Living: An Expert Guide to Making the Life-Saving Benefits of Carbohydrate Restriction Sustainable and Enjoyable</source>
          <year>2011</year>
          <month>05</month>
          <day>19</day>
          <publisher-loc>Frederick</publisher-loc>
          <publisher-name>Beyond Obesity</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Westman</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Feinman</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Mavropoulos</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Vernon</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Volek</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Wortman</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Yancy</surname>
              <given-names>William S</given-names>
            </name>
            <name name-style="western">
              <surname>Phinney</surname>
              <given-names>Stephen D</given-names>
            </name>
          </person-group>
          <article-title>Low-carbohydrate nutrition and metabolism</article-title>
          <source>Am J Clin Nutr</source>
          <year>2007</year>
          <month>08</month>
          <volume>86</volume>
          <issue>2</issue>
          <fpage>276</fpage>
          <lpage>84</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0002-9165(23)13296-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ajcn/86.2.276</pub-id>
          <pub-id pub-id-type="medline">17684196</pub-id>
          <pub-id pub-id-type="pii">S0002-9165(23)13296-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ludwig</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Apovian</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Aronne</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Astrup</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cantley</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Ebbeling</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Heymsfield</surname>
              <given-names>Steven B</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>James D</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>Janet C</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>Ronald M</given-names>
            </name>
            <name name-style="western">
              <surname>Taubes</surname>
              <given-names>Gary</given-names>
            </name>
            <name name-style="western">
              <surname>Volek</surname>
              <given-names>Jeff S</given-names>
            </name>
            <name name-style="western">
              <surname>Westman</surname>
              <given-names>Eric C</given-names>
            </name>
            <name name-style="western">
              <surname>Willett</surname>
              <given-names>Walter C</given-names>
            </name>
            <name name-style="western">
              <surname>Yancy</surname>
              <given-names>William S</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>Mark I</given-names>
            </name>
          </person-group>
          <article-title>Competing paradigms of obesity pathogenesis: energy balance versus carbohydrate-insulin models</article-title>
          <source>Eur J Clin Nutr</source>
          <year>2022</year>
          <month>09</month>
          <volume>76</volume>
          <issue>9</issue>
          <fpage>1209</fpage>
          <lpage>1221</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35896818"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41430-022-01179-2</pub-id>
          <pub-id pub-id-type="medline">35896818</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41430-022-01179-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC9436778</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Westman</surname>
              <given-names>EC</given-names>
            </name>
          </person-group>
          <article-title>Editorial: Carbohydrate-restricted nutrition and diabetes mellitus</article-title>
          <source>Front Nutr</source>
          <year>2021</year>
          <volume>8</volume>
          <fpage>827990</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35127799"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fnut.2021.827990</pub-id>
          <pub-id pub-id-type="medline">35127799</pub-id>
          <pub-id pub-id-type="pmcid">PMC8813844</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Westman</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Tondt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Maguire</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yancy</surname>
              <given-names>WS</given-names>
            </name>
          </person-group>
          <article-title>Implementing a low-carbohydrate, ketogenic diet to manage type 2 diabetes mellitus</article-title>
          <source>Expert Rev Endocrinol Metab</source>
          <year>2018</year>
          <month>09</month>
          <volume>13</volume>
          <issue>5</issue>
          <fpage>263</fpage>
          <lpage>272</lpage>
          <pub-id pub-id-type="doi">10.1080/17446651.2018.1523713</pub-id>
          <pub-id pub-id-type="medline">30289048</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Lanjian</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Zhouchen</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Die</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>Jinxia</given-names>
            </name>
          </person-group>
          <article-title>Are low carbohydrate diet interventions beneficial for metabolic syndrome and its components? A systematic review and meta-analysis of randomized controlled trials</article-title>
          <source>Int J Obes (Lond)</source>
          <year>2025</year>
          <month>07</month>
          <volume>49</volume>
          <issue>7</issue>
          <fpage>1252</fpage>
          <lpage>1263</lpage>
          <pub-id pub-id-type="doi">10.1038/s41366-025-01822-5</pub-id>
          <pub-id pub-id-type="medline">40579564</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41366-025-01822-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chamma</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chamma</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mattar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Slaybe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Haidar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rizk</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Beyond epilepsy management: a narrative review of the health effects of ketogenic diets</article-title>
          <source>Nutrition</source>
          <year>2025</year>
          <month>09</month>
          <volume>137</volume>
          <fpage>112804</fpage>
          <pub-id pub-id-type="doi">10.1016/j.nut.2025.112804</pub-id>
          <pub-id pub-id-type="medline">40403346</pub-id>
          <pub-id pub-id-type="pii">S0899-9007(25)00122-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahire</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yadav</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bhamare</surname>
              <given-names>UU</given-names>
            </name>
            <name name-style="western">
              <surname>Kaur</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Palkar</surname>
              <given-names>MB</given-names>
            </name>
          </person-group>
          <article-title>From refractory epilepsy to neurodegeneration: emerging mechanistic and clinical insights into the ketogenic diet</article-title>
          <source>FASEB J</source>
          <year>2026</year>
          <month>03</month>
          <day>31</day>
          <volume>40</volume>
          <issue>6</issue>
          <fpage>e71609</fpage>
          <pub-id pub-id-type="doi">10.1096/fj.202503317R</pub-id>
          <pub-id pub-id-type="medline">41846418</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ede</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>Change Your Diet, Change Your Mind: A Powerful Plan to Improve Mood, Overcome Anxiety, and Protect Memory for a Lifetime of Optimal Mental Health</source>
          <year>2024</year>
          <month>01</month>
          <day>30</day>
          <publisher-loc>New York City</publisher-loc>
          <publisher-name>Balance</publisher-name>
          <fpage>384</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noakes</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Windt</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Evidence that supports the prescription of low-carbohydrate high-fat diets: a narrative review</article-title>
          <source>Br J Sports Med</source>
          <year>2017</year>
          <month>01</month>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>133</fpage>
          <lpage>139</lpage>
          <pub-id pub-id-type="doi">10.1136/bjsports-2016-096491</pub-id>
          <pub-id pub-id-type="medline">28053201</pub-id>
          <pub-id pub-id-type="pii">51/2/133</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sethi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wakeham</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ketter</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hooshmand</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bjornstad</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Westman</surname>
              <given-names>Eric</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>Ronald M</given-names>
            </name>
            <name name-style="western">
              <surname>Saslow</surname>
              <given-names>Laura</given-names>
            </name>
          </person-group>
          <article-title>Ketogenic Diet Intervention on Metabolic and Psychiatric Health in Bipolar and Schizophrenia: A Pilot Trial</article-title>
          <source>Psychiatry Res</source>
          <year>2024</year>
          <month>05</month>
          <volume>335</volume>
          <fpage>115866</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0165-1781(24)00151-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.psychres.2024.115866</pub-id>
          <pub-id pub-id-type="medline">38547601</pub-id>
          <pub-id pub-id-type="pii">S0165-1781(24)00151-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Danan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Westman</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Saslow</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Ede</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The ketogenic diet for refractory mental illness: a retrospective analysis of 31 inpatients</article-title>
          <source>Front Psychiatry</source>
          <year>2022</year>
          <volume>13</volume>
          <fpage>951376</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35873236"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyt.2022.951376</pub-id>
          <pub-id pub-id-type="medline">35873236</pub-id>
          <pub-id pub-id-type="pmcid">PMC9299263</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarnyai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Kraeuter</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Palmer</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Ketogenic diet for schizophrenia: clinical implication</article-title>
          <source>Curr Opin Psychiatry</source>
          <year>2019</year>
          <month>09</month>
          <volume>32</volume>
          <issue>5</issue>
          <fpage>394</fpage>
          <lpage>401</lpage>
          <pub-id pub-id-type="doi">10.1097/YCO.0000000000000535</pub-id>
          <pub-id pub-id-type="medline">31192814</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dyńka</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rodzeń</surname>
              <given-names>Ł</given-names>
            </name>
            <name name-style="western">
              <surname>Rodzeń</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Łojko</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Karakuła-Juchnowicz</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ede</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Grzywacz</surname>
              <given-names>Żaneta</given-names>
            </name>
            <name name-style="western">
              <surname>Antosik</surname>
              <given-names>Katarzyna</given-names>
            </name>
            <name name-style="western">
              <surname>Sethi</surname>
              <given-names>Shebani</given-names>
            </name>
            <name name-style="western">
              <surname>Unwin</surname>
              <given-names>David</given-names>
            </name>
          </person-group>
          <article-title>The ketogenic diet is not for everyone: contraindications, side effects, and drug interactions</article-title>
          <source>Ann Med</source>
          <year>2026</year>
          <month>12</month>
          <volume>58</volume>
          <issue>1</issue>
          <fpage>2603016</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/10.1080/07853890.2025.2603016?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub 0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/07853890.2025.2603016</pub-id>
          <pub-id pub-id-type="medline">41486865</pub-id>
          <pub-id pub-id-type="pmcid">PMC12777878</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kamiński</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Skonieczna-Żydecka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nowak</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Stachowska</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Global and local diet popularity rankings, their secular trends, and seasonal variation in Google Trends data</article-title>
          <source>Nutrition</source>
          <year>2020</year>
          <volume>79-80</volume>
          <fpage>110759</fpage>
          <pub-id pub-id-type="doi">10.1016/j.nut.2020.110759</pub-id>
          <pub-id pub-id-type="medline">32563767</pub-id>
          <pub-id pub-id-type="pii">S0899-9007(20)30042-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lennerz</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Mey</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Henn</surname>
              <given-names>OH</given-names>
            </name>
            <name name-style="western">
              <surname>Ludwig</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>Behavioral characteristics and self-reported health status among 2029 adults consuming a "Carnivore Diet"</article-title>
          <source>Curr Dev Nutr</source>
          <year>2021</year>
          <month>12</month>
          <volume>5</volume>
          <issue>12</issue>
          <fpage>nzab133</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2475-2991(22)10608-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/cdn/nzab133</pub-id>
          <pub-id pub-id-type="medline">34934897</pub-id>
          <pub-id pub-id-type="pii">S2475-2991(22)10608-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8684475</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaňková</surname>
              <given-names>Jaroslava</given-names>
            </name>
            <name name-style="western">
              <surname>Binder</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Matthes</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Health-related communication of social media influencers: a scoping review</article-title>
          <source>Health Commun</source>
          <year>2025</year>
          <month>06</month>
          <volume>40</volume>
          <issue>7</issue>
          <fpage>1300</fpage>
          <lpage>1313</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/10.1080/10410236.2024.2397268?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/10410236.2024.2397268</pub-id>
          <pub-id pub-id-type="medline">39258728</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Madathil</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Rivera-Rodriguez</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Greenstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Gramopadhye</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Healthcare information on youTube: a systematic review</article-title>
          <source>Health Informatics J</source>
          <year>2015</year>
          <month>09</month>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>173</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1460458213512220?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1460458213512220</pub-id>
          <pub-id pub-id-type="medline">24670899</pub-id>
          <pub-id pub-id-type="pii">1460458213512220</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>WYS</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>WMP</given-names>
            </name>
          </person-group>
          <article-title>Addressing health-related misinformation on social media</article-title>
          <source>JAMA</source>
          <year>2018</year>
          <month>12</month>
          <day>18</day>
          <volume>320</volume>
          <issue>23</issue>
          <fpage>2417</fpage>
          <lpage>2418</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2018.16865</pub-id>
          <pub-id pub-id-type="medline">30428002</pub-id>
          <pub-id pub-id-type="pii">2715795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schillinger</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chittamuru</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ramírez</surname>
              <given-names>A Susana</given-names>
            </name>
          </person-group>
          <article-title>From "Infodemics" to health promotion: a novel framework for the role of social media in public health</article-title>
          <source>Am J Public Health</source>
          <year>2020</year>
          <month>09</month>
          <volume>110</volume>
          <issue>9</issue>
          <fpage>1393</fpage>
          <lpage>1396</lpage>
          <pub-id pub-id-type="doi">10.2105/AJPH.2020.305746</pub-id>
          <pub-id pub-id-type="medline">32552021</pub-id>
          <pub-id pub-id-type="pmcid">PMC7427212</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Domingo-Salvany</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The science of real-time data capture: self-reports in health research</article-title>
          <source>J Epidemiol Community Health</source>
          <year>2008</year>
          <month>05</month>
          <day>01</day>
          <volume>62</volume>
          <issue>5</issue>
          <fpage>471.1</fpage>
          <lpage>471</lpage>
          <pub-id pub-id-type="doi">10.1136/jech.2007.068551</pub-id>
          <pub-id pub-id-type="medline">18413463</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elwert</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Winship</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Endogenous selection bias: the problem of conditioning on a collider variable</article-title>
          <source>Annu Rev Sociol</source>
          <year>2014</year>
          <month>07</month>
          <volume>40</volume>
          <fpage>31</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30111904"/>
          </comment>
          <pub-id pub-id-type="doi">10.1146/annurev-soc-071913-043455</pub-id>
          <pub-id pub-id-type="medline">30111904</pub-id>
          <pub-id pub-id-type="pmcid">PMC6089543</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Althubaiti</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Information bias in health research: definition, pitfalls, and adjustment methods</article-title>
          <source>J Multidiscip Healthc</source>
          <year>2016</year>
          <volume>9</volume>
          <fpage>211</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/10.2147/JMDH.S104807?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.2147/JMDH.S104807</pub-id>
          <pub-id pub-id-type="medline">27217764</pub-id>
          <pub-id pub-id-type="pii">jmdh-9-211</pub-id>
          <pub-id pub-id-type="pmcid">PMC4862344</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinnenberg</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Buttenheim</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Padrez</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mancheno</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Twitter as a tool for health research: a systematic review</article-title>
          <source>Am J Public Health</source>
          <year>2017</year>
          <month>01</month>
          <volume>107</volume>
          <issue>1</issue>
          <fpage>e1</fpage>
          <lpage>e8</lpage>
          <pub-id pub-id-type="doi">10.2105/AJPH.2016.303512</pub-id>
          <pub-id pub-id-type="medline">27854532</pub-id>
          <pub-id pub-id-type="pmcid">PMC5308155</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charles-Smith</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Reynolds</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>EHY</given-names>
            </name>
            <name name-style="western">
              <surname>Olsen</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlin</surname>
              <given-names>Julie A</given-names>
            </name>
            <name name-style="western">
              <surname>Shigematsu</surname>
              <given-names>Mika</given-names>
            </name>
            <name name-style="western">
              <surname>Streichert</surname>
              <given-names>Laura C</given-names>
            </name>
            <name name-style="western">
              <surname>Suda</surname>
              <given-names>Katie J</given-names>
            </name>
            <name name-style="western">
              <surname>Corley</surname>
              <given-names>Courtney D</given-names>
            </name>
          </person-group>
          <article-title>Using Social Media for Actionable Disease Surveillance and Outbreak Management: A Systematic Literature Review</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>10</issue>
          <fpage>e0139701</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0139701"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0139701</pub-id>
          <pub-id pub-id-type="medline">26437454</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-21599</pub-id>
          <pub-id pub-id-type="pmcid">PMC4593536</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moorhead</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Hazlett</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Irwin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hoving</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A new dimension of health care: systematic review of the uses, benefits, and limitations of social media for health communication</article-title>
          <source>J Med Internet Res</source>
          <year>2013</year>
          <month>04</month>
          <day>23</day>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>e85</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2013/4/e85/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1933</pub-id>
          <pub-id pub-id-type="medline">23615206</pub-id>
          <pub-id pub-id-type="pii">v15i4e85</pub-id>
          <pub-id pub-id-type="pmcid">PMC3636326</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ginn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pharmacovigilance from social media: mining adverse drug reaction mentions using sequence labeling with word embedding cluster features</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>05</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>671</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25755127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocu041</pub-id>
          <pub-id pub-id-type="medline">25755127</pub-id>
          <pub-id pub-id-type="pii">ocu041</pub-id>
          <pub-id pub-id-type="pmcid">PMC4457113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>You are what you tweet: analyzing Twitter for public health</article-title>
          <source>ICWSM</source>
          <year>2021</year>
          <month>08</month>
          <day>03</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>265</fpage>
          <lpage>272</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1609/icwsm.v5i1.14137"/>
          </comment>
          <pub-id pub-id-type="doi">10.1609/icwsm.v5i1.14137</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sloane</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Osanlou</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bollegala</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Maskell</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pirmohamed</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Social media and pharmacovigilance: a review of the opportunities and challenges</article-title>
          <source>Br J Clin Pharmacol</source>
          <year>2015</year>
          <month>10</month>
          <volume>80</volume>
          <issue>4</issue>
          <fpage>910</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26147850"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/bcp.12717</pub-id>
          <pub-id pub-id-type="medline">26147850</pub-id>
          <pub-id pub-id-type="pmcid">PMC4594734</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The value of social media analysis for adverse events detection and pharmacovigilance: scoping review</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2024</year>
          <month>09</month>
          <day>06</day>
          <volume>10</volume>
          <fpage>e59167</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2024//e59167/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/59167</pub-id>
          <pub-id pub-id-type="medline">39240684</pub-id>
          <pub-id pub-id-type="pii">v10i1e59167</pub-id>
          <pub-id pub-id-type="pmcid">PMC11415724</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>XU</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Proceedings of the 9th social media mining for health research and applications (SMM4H 2024) workshop and shared tasks</article-title>
          <year>2024</year>
          <month>08</month>
          <day>15</day>
          <conf-name>Proceedings of the 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop</conf-name>
          <conf-date>August 15, 2024</conf-date>
          <conf-loc>Bangkok, Thailand</conf-loc>
          <publisher-loc>Bangkok</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>A</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2024.smm4h-1.0/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kiciman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Discovering shifts to suicidal ideation from mental health content in social media</article-title>
          <source>Proc SIGCHI Conf Hum Factor Comput Syst</source>
          <year>2016</year>
          <month>05</month>
          <conf-name>CHI '16: Proceedings of the 2016 CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>May 7-12, 2016</conf-date>
          <conf-loc>San Jose California USA</conf-loc>
          <fpage>2098</fpage>
          <lpage>2110</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29082385"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2858036.2858207</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keelan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pavri-Garcia</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Tomlinson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>YouTube as a source of information on immunization: a content analysis</article-title>
          <source>JAMA</source>
          <year>2007</year>
          <month>12</month>
          <day>05</day>
          <volume>298</volume>
          <issue>21</issue>
          <fpage>2482</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.298.21.2482</pub-id>
          <pub-id pub-id-type="medline">18056901</pub-id>
          <pub-id pub-id-type="pii">298/21/2482</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mohamed</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Shoufan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Users' experience with health-related content on YouTube: an exploratory study</article-title>
          <source>BMC Public Health</source>
          <year>2024</year>
          <month>01</month>
          <day>03</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>86</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-023-17585-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-023-17585-5</pub-id>
          <pub-id pub-id-type="medline">38172765</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-023-17585-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC10765842</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Teng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khong</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Pahlevan Sharif</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>YouTube video comments on healthy eating: descriptive and predictive analysis</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>10</month>
          <day>01</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e19618</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/4/e19618/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19618</pub-id>
          <pub-id pub-id-type="medline">33001036</pub-id>
          <pub-id pub-id-type="pii">v6i4e19618</pub-id>
          <pub-id pub-id-type="pmcid">PMC7563625</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Machine learning approaches for depression detection on social media: a systematic review of biases and methodological challenges</article-title>
          <source>J Behav Data</source>
          <year>2025</year>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>67</fpage>
          <lpage>102</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jbds.isdsa.org/jbds/article/view/110"/>
          </comment>
          <pub-id pub-id-type="doi">10.35566/jbds/caoyc</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>Mj</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Social Monitoring for Public Health</article-title>
          <source>Synthesis Lectures on Information Concepts, Retrieval, and Services</source>
          <year>2017</year>
          <month>08</month>
          <day>31</day>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>1</fpage>
          <lpage>183</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Leary</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Crutchley</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fine</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of social media as screening for suicide risk</article-title>
          <source>Biomed Inform Insights</source>
          <year>2018</year>
          <volume>10</volume>
          <fpage>1178222618792860</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1178222618792860?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1178222618792860</pub-id>
          <pub-id pub-id-type="medline">30158822</pub-id>
          <pub-id pub-id-type="pii">10.1177_1178222618792860</pub-id>
          <pub-id pub-id-type="pmcid">PMC6111391</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chiticariu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Reiss</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Rule-based information extraction is dead! Long live rule-based information extraction systems</article-title>
          <year>2013</year>
          <month>10</month>
          <conf-name>Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>May 04, 2026</conf-date>
          <conf-loc>Seattle, Washington, USA</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>827</fpage>
          <lpage>832</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D13-1079/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d13-1079</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Loke</surname>
              <given-names>YK</given-names>
            </name>
          </person-group>
          <article-title>Systematic review on the prevalence, frequency and comparative value of adverse events data in social media</article-title>
          <source>Br J Clin Pharmacol</source>
          <year>2015</year>
          <month>10</month>
          <volume>80</volume>
          <issue>4</issue>
          <fpage>878</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26271492"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/bcp.12746</pub-id>
          <pub-id pub-id-type="medline">26271492</pub-id>
          <pub-id pub-id-type="pmcid">PMC4594731</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>EB</given-names>
            </name>
          </person-group>
          <article-title>Probable inference, the law of succession, and statistical inference</article-title>
          <source>Journal of the American Statistical Association</source>
          <year>1927</year>
          <month>06</month>
          <volume>22</volume>
          <issue>158</issue>
          <fpage>209</fpage>
          <lpage>212</lpage>
          <pub-id pub-id-type="doi">10.1080/01621459.1927.10502953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gilardi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Alizadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kubli</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT outperforms crowd workers for text-annotation tasks</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2023</year>
          <month>07</month>
          <day>25</day>
          <volume>120</volume>
          <issue>30</issue>
          <fpage>e2305016120</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pnas.org/doi/10.1073/pnas.2305016120?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.2305016120</pub-id>
          <pub-id pub-id-type="medline">37463210</pub-id>
          <pub-id pub-id-type="pmcid">PMC10372638</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Törnberg</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Large language models outperform expert coders and supervised classifiers at annotating political social media messages</article-title>
          <source>Social Science Computer Review</source>
          <year>2025</year>
          <month>12</month>
          <volume>43</volume>
          <issue>6</issue>
          <fpage>1181</fpage>
          <lpage>1195</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/08944393241286471"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/08944393241286471</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Landis</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Koch</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>The measurement of observer agreement for categorical data</article-title>
          <source>Biometrics</source>
          <year>1977</year>
          <month>03</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>159</fpage>
          <pub-id pub-id-type="doi">10.2307/2529310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cicchetti</surname>
              <given-names>DV</given-names>
            </name>
            <name name-style="western">
              <surname>Feinstein</surname>
              <given-names>AR</given-names>
            </name>
          </person-group>
          <article-title>High agreement but low kappa: II. Resolving the paradoxes</article-title>
          <source>J Clin Epidemiol</source>
          <year>1990</year>
          <volume>43</volume>
          <issue>6</issue>
          <fpage>551</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/0895-4356(90)90159-m</pub-id>
          <pub-id pub-id-type="medline">2189948</pub-id>
          <pub-id pub-id-type="pii">0895-4356(90)90159-M</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feinstein</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Cicchetti</surname>
              <given-names>DV</given-names>
            </name>
          </person-group>
          <article-title>High agreement but low kappa: I. The problems of two paradoxes</article-title>
          <source>J Clin Epidemiol</source>
          <year>1990</year>
          <volume>43</volume>
          <issue>6</issue>
          <fpage>543</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/0895-4356(90)90158-l</pub-id>
          <pub-id pub-id-type="medline">2348207</pub-id>
          <pub-id pub-id-type="pii">0895-4356(90)90158-L</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Synthesis lectures on human language technologies</article-title>
          <source>Sentiment Analysis and Opinion Mining</source>
          <year>2012</year>
          <month>05</month>
          <day>23</day>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>1</fpage>
          <lpage>167</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pontiki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Galanis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Papageorgiou</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>SemEval-2016 Task 5: aspect based sentiment analysis</article-title>
          <year>2016</year>
          <month>01</month>
          <day>01</day>
          <conf-name>Proceedings of the 10th International Workshop on Semantic Evaluation (SemEval-2016)</conf-name>
          <conf-date>June 16-17, 2016</conf-date>
          <conf-loc>San Diego, California</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>19</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/S16-1002/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/S16-1002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Byrt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bishop</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carlin</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Bias, prevalence and kappa</article-title>
          <source>J Clin Epidemiol</source>
          <year>1993</year>
          <month>05</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>423</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/0895-4356(93)90018-v</pub-id>
          <pub-id pub-id-type="medline">8501467</pub-id>
          <pub-id pub-id-type="pii">0895-4356(93)90018-V</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Capturing the patient's perspective: a review of advances in natural language processing of health-related text</article-title>
          <source>Yearb Med Inform</source>
          <year>2017</year>
          <month>08</month>
          <volume>26</volume>
          <issue>1</issue>
          <fpage>214</fpage>
          <lpage>227</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.15265/IY-2017-029"/>
          </comment>
          <pub-id pub-id-type="doi">10.15265/IY-2017-029</pub-id>
          <pub-id pub-id-type="medline">29063568</pub-id>
          <pub-id pub-id-type="pmcid">PMC6250990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tutubalina</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Miftahutdinov</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Alimova</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dirkson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Verberne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>Davy</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>Graciela</given-names>
            </name>
          </person-group>
          <article-title>DeepADEMiner: a deep learning pharmacovigilance pipeline for extraction and normalization of adverse drug event mentions on Twitter</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>09</month>
          <day>18</day>
          <volume>28</volume>
          <issue>10</issue>
          <fpage>2184</fpage>
          <lpage>2192</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34270701"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab114</pub-id>
          <pub-id pub-id-type="medline">34270701</pub-id>
          <pub-id pub-id-type="pii">6322900</pub-id>
          <pub-id pub-id-type="pmcid">PMC8449608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance: framework for an emerging set of public health informatics methods to analyze search, communication and publication behavior on the internet</article-title>
          <source>J Med Internet Res</source>
          <year>2009</year>
          <month>03</month>
          <day>27</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2009/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1157</pub-id>
          <pub-id pub-id-type="medline">19329408</pub-id>
          <pub-id pub-id-type="pii">v11i1e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC2762766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ginn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jayaraman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Upadhaya</surname>
              <given-names>Tejaswi</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>Graciela</given-names>
            </name>
          </person-group>
          <article-title>Utilizing social media data for pharmacovigilance: A review</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>04</month>
          <volume>54</volume>
          <fpage>202</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00036-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.02.004</pub-id>
          <pub-id pub-id-type="medline">25720841</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00036-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4408239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maloney-Krichmar</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Preece</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A multilevel analysis of sociability, usability, and community dynamics in an online health community</article-title>
          <source>ACM Trans Comput-Hum Interact</source>
          <year>2005</year>
          <month>06</month>
          <day>01</day>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>201</fpage>
          <lpage>232</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/1067860.1067864"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/1067860.1067864</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Madathil</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Rivera-Rodriguez</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Greenstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Gramopadhye</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Healthcare information on youTube: a systematic review</article-title>
          <source>Health Informatics J</source>
          <year>2015</year>
          <month>09</month>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>173</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1460458213512220?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1460458213512220</pub-id>
          <pub-id pub-id-type="medline">24670899</pub-id>
          <pub-id pub-id-type="pii">1460458213512220</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance tracking online health information and cyberbehavior for public health</article-title>
          <source>Am J Prev Med</source>
          <year>2011</year>
          <month>05</month>
          <volume>40</volume>
          <issue>5 Suppl 2</issue>
          <fpage>S154</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2011.02.006</pub-id>
          <pub-id pub-id-type="medline">21521589</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(11)00088-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Daumé</surname>
              <given-names>IIIH</given-names>
            </name>
          </person-group>
          <article-title>Frustratingly easy domain adaptation</article-title>
          <year>2007</year>
          <conf-name>Proceedings of the 45th Annual Meeting of the Association of Computational Linguistics</conf-name>
          <conf-date>June 23-30, 2007</conf-date>
          <conf-loc>Prague, Czech Republic</conf-loc>
          <fpage>256</fpage>
          <lpage>263</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P07-1033/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noy</surname>
              <given-names>NF</given-names>
            </name>
            <name name-style="western">
              <surname>McGuinness</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Ontology development 101: a guide to creating your first ontology</article-title>
          <source>Stanford Knowledge Systems Laboratory Technical Report</source>
          <year>2001</year>
          <month>01</month>
          <day>01</day>
          <publisher-loc>Stanford, California</publisher-loc>
          <publisher-name>Stanford University</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Staudacher</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Chey</surname>
              <given-names>WD</given-names>
            </name>
            <name name-style="western">
              <surname>Whelan</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Optimal design of clinical trials of dietary interventions in disorders of gut-brain interaction</article-title>
          <source>Am J Gastroenterol</source>
          <year>2022</year>
          <month>06</month>
          <day>01</day>
          <volume>117</volume>
          <issue>6</issue>
          <fpage>973</fpage>
          <lpage>984</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35297784"/>
          </comment>
          <pub-id pub-id-type="doi">10.14309/ajg.0000000000001732</pub-id>
          <pub-id pub-id-type="medline">35297784</pub-id>
          <pub-id pub-id-type="pii">00000434-202206000-00030</pub-id>
          <pub-id pub-id-type="pmcid">PMC9169766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jim</surname>
              <given-names>HSL</given-names>
            </name>
            <name name-style="western">
              <surname>Hoogland</surname>
              <given-names>AI</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>NC</given-names>
            </name>
            <name name-style="western">
              <surname>Barata</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dicker</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Knoop</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>Brian D</given-names>
            </name>
            <name name-style="western">
              <surname>Perkins</surname>
              <given-names>Randa</given-names>
            </name>
            <name name-style="western">
              <surname>Rollison</surname>
              <given-names>Dana</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>Scott M</given-names>
            </name>
            <name name-style="western">
              <surname>Nanda</surname>
              <given-names>Ronica</given-names>
            </name>
            <name name-style="western">
              <surname>Berglund</surname>
              <given-names>Anders</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>Ross</given-names>
            </name>
            <name name-style="western">
              <surname>Johnstone</surname>
              <given-names>Peter A S</given-names>
            </name>
          </person-group>
          <article-title>Innovations in research and clinical care using patient-generated health data</article-title>
          <source>CA Cancer J Clin</source>
          <year>2020</year>
          <month>05</month>
          <volume>70</volume>
          <issue>3</issue>
          <fpage>182</fpage>
          <lpage>199</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32311776"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/caac.21608</pub-id>
          <pub-id pub-id-type="medline">32311776</pub-id>
          <pub-id pub-id-type="pmcid">PMC7488179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dyńka</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kowalcze</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Charuta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paziewska</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The ketogenic diet and cardiovascular diseases</article-title>
          <source>Nutrients</source>
          <year>2023</year>
          <month>07</month>
          <day>28</day>
          <volume>15</volume>
          <issue>15</issue>
          <fpage>3368</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=nu15153368"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/nu15153368</pub-id>
          <pub-id pub-id-type="medline">37571305</pub-id>
          <pub-id pub-id-type="pii">nu15153368</pub-id>
          <pub-id pub-id-type="pmcid">PMC10421332</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hallberg</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>McKenzie</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Bhanpuri</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Hazbun</surname>
              <given-names>Tamara L</given-names>
            </name>
            <name name-style="western">
              <surname>Volk</surname>
              <given-names>Brittanie M</given-names>
            </name>
            <name name-style="western">
              <surname>McCarter</surname>
              <given-names>James P</given-names>
            </name>
            <name name-style="western">
              <surname>Phinney</surname>
              <given-names>Stephen D</given-names>
            </name>
            <name name-style="western">
              <surname>Volek</surname>
              <given-names>Jeff S</given-names>
            </name>
          </person-group>
          <article-title>Effectiveness and Safety of a Novel Care Model for the Management of Type 2 Diabetes at 1 Year: An Open-Label, Non-Randomized, Controlled Study</article-title>
          <source>Diabetes Ther</source>
          <year>2018</year>
          <month>04</month>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>583</fpage>
          <lpage>612</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29417495"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13300-018-0373-9</pub-id>
          <pub-id pub-id-type="medline">29417495</pub-id>
          <pub-id pub-id-type="pii">10.1007/s13300-018-0373-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6104272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walsh</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Cahir</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tecklenborg</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Byrne</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Culbertson</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>KE</given-names>
            </name>
          </person-group>
          <article-title>The association between medication non-adherence and adverse health outcomes in ageing populations: a systematic review and meta-analysis</article-title>
          <source>Br J Clin Pharmacol</source>
          <year>2019</year>
          <month>11</month>
          <volume>85</volume>
          <issue>11</issue>
          <fpage>2464</fpage>
          <lpage>2478</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31486099"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/bcp.14075</pub-id>
          <pub-id pub-id-type="medline">31486099</pub-id>
          <pub-id pub-id-type="pmcid">PMC6848955</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reading</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Merrill</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Converging and diverging needs between patients and providers who are collecting and using patient-generated health data: an integrative review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>06</month>
          <day>01</day>
          <volume>25</volume>
          <issue>6</issue>
          <fpage>759</fpage>
          <lpage>771</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29471330"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy006</pub-id>
          <pub-id pub-id-type="medline">29471330</pub-id>
          <pub-id pub-id-type="pii">4869761</pub-id>
          <pub-id pub-id-type="pmcid">PMC5978018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bakshy</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Messing</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Adamic</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>Political science. Exposure to ideologically diverse news and opinion on Facebook</article-title>
          <source>Science</source>
          <year>2015</year>
          <month>06</month>
          <day>05</day>
          <volume>348</volume>
          <issue>6239</issue>
          <fpage>1130</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1126/science.aaa1160</pub-id>
          <pub-id pub-id-type="medline">25953820</pub-id>
          <pub-id pub-id-type="pii">science.aaa1160</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Yexin</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Jianglai</given-names>
            </name>
          </person-group>
          <article-title>Trade-offs between machine learning and deep learning for mental illness detection on social media</article-title>
          <source>Sci Rep</source>
          <year>2025</year>
          <month>04</month>
          <day>25</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>14497</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-025-99167-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-025-99167-6</pub-id>
          <pub-id pub-id-type="medline">40281061</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-025-99167-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC12032126</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thelwall</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sud</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Vis</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Commenting on YouTube videos: from Guatemalan rock to El Big Bang</article-title>
          <source>J Am Soc Inf Sci</source>
          <year>2011</year>
          <month>11</month>
          <day>14</day>
          <volume>63</volume>
          <issue>3</issue>
          <fpage>616</fpage>
          <lpage>629</lpage>
          <pub-id pub-id-type="doi">10.1002/asi.21679</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>LLM-based annotation and token-augmented modeling for emotional tone classification in online cancer peer-support posts</article-title>
          <source>medRxiv</source>
          <year>2026</year>
          <month>01</month>
          <day>30</day>
          <fpage>1</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.64898/2026.01.27.26344999</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Giglietto</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bennato</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The open laboratory: limits and possibilities of using Facebook, Twitter, and YouTube as a research data source</article-title>
          <source>J Technol Hum Serv</source>
          <year>2012</year>
          <month>12</month>
          <day>06</day>
          <volume>30</volume>
          <issue>3-4</issue>
          <fpage>145</fpage>
          <lpage>159</lpage>
          <pub-id pub-id-type="doi">10.1080/15228835.2012.743797</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kington</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Arnesen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Curry</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lazer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Villarruel</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Identifying credible sources of health information in social media: principles and attributes</article-title>
          <source>NAM Perspect</source>
          <year>2021</year>
          <volume>2021</volume>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34611600"/>
          </comment>
          <pub-id pub-id-type="doi">10.31478/202107a</pub-id>
          <pub-id pub-id-type="medline">34611600</pub-id>
          <pub-id pub-id-type="pii">202107a</pub-id>
          <pub-id pub-id-type="pmcid">PMC8486420</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
