<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e54321</article-id>
      <article-id pub-id-type="pmid">39662896</article-id>
      <article-id pub-id-type="doi">10.2196/54321</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Combining Topic Modeling, Sentiment Analysis, and Corpus Linguistics to Analyze Unstructured Web-Based Patient Experience Data: Case Study of Modafinil Experiences</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chatzimina</surname>
            <given-names>Maria</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mahmic Kaknjo</surname>
            <given-names>Mersiha</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Walsh</surname>
            <given-names>Julia</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Warwick Medical School</institution>
            <institution>University of Warwick</institution>
            <addr-line>Gibbet Hill</addr-line>
            <addr-line>Coventry, CV4 7AL</addr-line>
            <country>United Kingdom</country>
            <phone>44 02476528009</phone>
            <email>julia.walsh@warwick.ac.uk</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9787-0349</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Cave</surname>
            <given-names>Jonathan</given-names>
          </name>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9879-6507</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Griffiths</surname>
            <given-names>Frances</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4173-1438</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Warwick Medical School</institution>
        <institution>University of Warwick</institution>
        <addr-line>Coventry</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Economics</institution>
        <institution>University of Warwick</institution>
        <addr-line>Coventry</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Centre for Health Policy</institution>
        <institution>University of the Witwatersrand</institution>
        <addr-line>Johannesburg</addr-line>
        <country>South Africa</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Julia Walsh <email>julia.walsh@warwick.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>11</day>
        <month>12</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e54321</elocation-id>
      <history>
        <date date-type="received">
          <day>6</day>
          <month>11</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>22</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>19</day>
          <month>6</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>27</day>
          <month>9</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Julia Walsh, Jonathan Cave, Frances Griffiths. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 11.12.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e54321" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Patient experience data from social media offer patient-centered perspectives on disease, treatments, and health service delivery. Current guidelines typically rely on systematic reviews, while qualitative health studies are often seen as anecdotal and nongeneralizable. This study explores combining personal health experiences from multiple sources to create generalizable evidence.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The study aims to (1) investigate how combining unsupervised natural language processing (NLP) and corpus linguistics can explore patient perspectives from a large unstructured dataset of modafinil experiences, (2) compare findings with Cochrane meta-analyses on modafinil’s effectiveness, and (3) develop a methodology for analyzing such data.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Using 69,022 posts from 790 sources, we used a variety of NLP and corpus techniques to analyze the data, including data cleaning techniques to maximize post context, Python for NLP techniques, and Sketch Engine for linguistic analysis. We used multiple topic mining approaches, such as latent Dirichlet allocation, nonnegative matrix factorization, and word-embedding methods. Sentiment analysis used TextBlob and Valence Aware Dictionary and Sentiment Reasoner, while corpus methods including collocation, concordance, and n-gram generation. Previous work had mapped topic mining to themes, such as health conditions, reasons for taking modafinil, symptom impacts, dosage, side effects, effectiveness, and treatment comparisons.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Key findings of the study included modafinil use across 166 health conditions, most frequently narcolepsy, multiple sclerosis, attention-deficit disorder, anxiety, sleep apnea, depression, bipolar disorder, chronic fatigue syndrome, fibromyalgia, and chronic disease. Word-embedding topic modeling mapped 70% of posts to predefined themes, while sentiment analysis revealed 65% positive responses, 6% neutral responses, and 28% negative responses. Notably, the perceived effectiveness of modafinil for various conditions strongly contrasts with the findings of existing randomized controlled trials and systematic reviews, which conclude insufficient or low-quality evidence of effectiveness.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study demonstrated the value of combining NLP with linguistic techniques for analyzing large unstructured text datasets. Despite varying opinions, findings were methodologically consistent and challenged existing clinical evidence. This suggests that patient-generated data could potentially provide valuable insights into treatment outcomes, potentially improving clinical understanding and patient care.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>unstructured text</kwd>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
        <kwd>topic modeling</kwd>
        <kwd>sentiment analysis</kwd>
        <kwd>corpus linguistics</kwd>
        <kwd>social media data</kwd>
        <kwd>patient experience</kwd>
        <kwd>unsupervised</kwd>
        <kwd>modafinil</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Spontaneously generated online patient experience (SGOPE) data collected from social media platforms are a rich data source for natural language processing (NLP) tasks [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. Providing patient-centered perspectives [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>] on the posters’ experiences of disease, treatments, and health service delivery rather than the researcher-driven focus of published literature [<xref ref-type="bibr" rid="ref7">7</xref>], SGOPE data are increasingly recognized as having the potential to transform clinical care and research [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <p>Current estimates suggest that 3.6 billion people worldwide are currently using social media, with numbers forecast to increase to 4.4 billion by 2025 [<xref ref-type="bibr" rid="ref15">15</xref>]. Social media were originally seen as being mostly used by younger people, but a 2019 US study showed that 73% of individuals aged 50 to 64 years and 45% of those aged ≥65 years used at least 1 form of social media [<xref ref-type="bibr" rid="ref16">16</xref>]. SGOPE is recognized as being able to include a wider range of demographic groups, including many who may previously have been seen as “hard to reach” [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        <p>Modafinil is an oral wakefulness-promoting drug originally developed in the 1990s that is licensed by the UK National Health Service purely for narcolepsy, although its Food and Drug Administration classification in the United States allows it to be prescribed “off-label” for a wide variety of conditions [<xref ref-type="bibr" rid="ref20">20</xref>]. Modafinil targets symptoms of fatigue seen in many clinical presentations; however, current randomized controlled trial (RCT)–based evidence regarding its efficacy for treating other conditions is inconclusive [<xref ref-type="bibr" rid="ref21">21</xref>]. Having acquired a reputation as a “study drug,” modafinil has sparked a large volume of online discussion about posters’ experiences of taking it for both therapeutic and enhancement purposes.</p>
        <p>Patient narrative is already recognized as a tool that can help patients, clinicians, and researchers [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. Containing a mix of both objective and subjective views, SGOPE data provide a unique perspective on the way that patients perceive, manage, and react to their conditions, as well as how such conditions impact their life, their treatments, or other aspects of their health [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        <p>Although evidence-based medicine has been defined as the integration of the best research evidence with real-world clinical expertise and patient values (Sackett et al [<xref ref-type="bibr" rid="ref25">25</xref>]), in reality, the pyramid-shaped hierarchy of evidence quality ensures that it is the findings from RCTs and subsequent systematic reviews, rather than any other form of knowledge, that tend to dominate and be reflected in the clinical guidelines [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>].</p>
        <p>The need for a plurality of evidence-generating methods is already recognized [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. SGOPE represents a type of data that fall under the umbrella terms of real-world data (RWD) and real-world evidence. RWD include health care data generated from sources other than conventional RCTs, while real-world evidence is defined as evidence derived from the aggregation and analysis of RWD [<xref ref-type="bibr" rid="ref32">32</xref>] and is argued to have significant advantages that can be used to supplement or augment RCT findings, including the ability to identify “clinical gaps” [<xref ref-type="bibr" rid="ref33">33</xref>], indicating the effectiveness of an intervention in the real world, on much larger populations, and much faster than can be achieved within the artificial and highly constrained confines of an RCT [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Combining data sources such as SGOPE with new methods of analyzing unstructured data will enable the development of new and different approaches to knowledge and evidence generation.</p>
        <p>Our previous study compared a thematic qualitative analysis with an NLP-based analysis of a small number of posts related to the therapeutic use of modafinil [<xref ref-type="bibr" rid="ref21">21</xref>]. Eight main themes were identified from the posts, including details of the reasons for taking modafinil, conditions or symptoms, dosage, side effects, effectiveness, and outcomes in terms of quality of life, as well as details of other interventions whether previously tried, used concurrently, or subsequently moved on to. In this paper, we scale up this approach, using a combination of NLP and linguistic techniques to analyze a much larger dataset of modafinil experiences from a wide variety of social media platforms. We also compare the findings from some of the NLP tools used for the analysis to help future analysis of this type of data for health research.</p>
      </sec>
      <sec>
        <title>Methodology</title>
        <p>NLP approaches can be divided into 2 main types: supervised, which requires large quantities of the data to be labeled with the features of interest; and unsupervised, which uses clustering techniques that allow the data to tell their own story. Despite the development of ever-larger language models, such as GPT-3, which can be extremely resource heavy [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], there is an argument that to try to move nearer to the ultimate goal of natural language understanding, which is required to understand the complexity of patient experiences, entails stepping back toward combining unsupervised, rules-based methods with those from corpus linguistics [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. To replicate the inductive data–driven approach of qualitative studies, but on a much larger scale, this study uses unsupervised methods. These include varied methods of topic modeling, sentiment analysis, and linguistic analysis.</p>
        <p>Whichever approach is selected, cleaning the data is one of the most important and time-consuming components of the study. The cleaning process is specific to each project—each dataset has its own characteristics, and each project requires specific features from the dataset to answer the research question—but it is important to try to maximize the quality of the processed dataset for each subtask; for instance, in topic modeling, the aim of preprocessing is to reduce noise and incoherence from the data [<xref ref-type="bibr" rid="ref40">40</xref>], allowing the themes to emerge. Stemming and lemmatizing words to their root form enables this, whereas when assessing effectiveness, it is important to retain all relevant details to understand the nuanced context within the text. Taking too blunt an approach can result in the loss of potentially useful data.</p>
        <p>Particularly suitable for exploratory and descriptive analysis, topic modeling can be used as a method for determining what people are talking about in social media by looking for underlying structure within the text [<xref ref-type="bibr" rid="ref41">41</xref>]. Combining an inductive approach with quantitative measurement, topic modeling is a useful method for obtaining an insight into the concepts that are contained within documents in a similar manner to grounded theory [<xref ref-type="bibr" rid="ref42">42</xref>], although it is not yet widely used in clinical NLP [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
        <p>Sentiment analysis is a well-known and widely used technique within NLP that analyzes text for positive, neutral, or negative sentiment or emotion, aiming to extract an understanding of the meaning, mood, context, and intent. It has already been shown to be capable of reasonable agreement with online comments, including those rated using a Likert scale [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        <p>Causation is central to health care, both in understanding the onset of diseases or symptoms and the effectiveness of interventions or management strategies used to treat them [<xref ref-type="bibr" rid="ref45">45</xref>]. Showing causation in health care using non-RCT data has been viewed as problematic. At both structural and cultural levels, causation is generally seen as something that can only be shown in empirical settings such as RCTs, where all confounding factors are controlled for, and the Humean principle of “same cause, same effect” can be repeatedly shown [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>].</p>
        <p>Causal dispositionalism is an alternative approach to causation, which may be relevant to this type of data. This takes a more nuanced view of how the characteristics or dispositions of both the intervention and the individual combine in complex ways to affect the effectiveness [<xref ref-type="bibr" rid="ref48">48</xref>]. It suggests that population-level health research should be only 1 part of the evidence-generation process, and that it is listening to the patient narrative that can be the key to understanding their individual health needs [<xref ref-type="bibr" rid="ref47">47</xref>]. One of the strengths of narrative data, such as SGOPE data, is that they enable both author and reader to make sense of the interplay of actions and contexts in the text in a way that conveys perceived causality [<xref ref-type="bibr" rid="ref22">22</xref>]. The mantra “correlation does not equal causation” is justifiably used, but that leaves the question of how it is possible to determine causation.</p>
        <p>Causation can be defined as a reaction between 2 events: a cause event and its consequence. The cause must precede the consequence and is counterfactual in that the consequence would not have occurred without the cause. While this sounds quite logical and straightforward, causation theories are not necessarily definitive explanations of how events occur but rather represent how humans make sense of, and understand, the world [<xref ref-type="bibr" rid="ref49">49</xref>]. Williamson [<xref ref-type="bibr" rid="ref50">50</xref>] argues that causation can be shown by identifying or understanding the underlying mechanism between a correlated cause and effect.</p>
        <p>NLP methods still struggle with identifying potential causality; therefore, we used linguistic analysis to aid in this process. The language used to describe cause and effect can be crucial to understanding the semantic meaning of a text but is not always easy to identify. One method involves using transition words that link a reason to a consequence or indicate a sequence of events (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>).</p>
        <boxed-text id="box1" position="float">
          <title>Examples of text that indicate sequential events.</title>
          <p>
            <bold>Transition words</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Firstly</p>
            </list-item>
            <list-item>
              <p>to begin with</p>
            </list-item>
            <list-item>
              <p>next</p>
            </list-item>
            <list-item>
              <p>then following this</p>
            </list-item>
            <list-item>
              <p>at this time</p>
            </list-item>
            <list-item>
              <p>now</p>
            </list-item>
            <list-item>
              <p>at this point</p>
            </list-item>
            <list-item>
              <p>previously</p>
            </list-item>
            <list-item>
              <p>before this</p>
            </list-item>
            <list-item>
              <p>after</p>
            </list-item>
            <list-item>
              <p>afterward</p>
            </list-item>
            <list-item>
              <p>subsequently</p>
            </list-item>
            <list-item>
              <p>finally</p>
            </list-item>
            <list-item>
              <p>at last</p>
            </list-item>
            <list-item>
              <p>simultaneously</p>
            </list-item>
            <list-item>
              <p>meanwhile</p>
            </list-item>
          </list>
        </boxed-text>
        <p>Traditionally, findings from health-based qualitative studies have been seen as anecdotal, unrepresentative, and not generalizable across populations [<xref ref-type="bibr" rid="ref51">51</xref>]. This study examines how we can move toward combining personal evidence of a health effect from sufficient numbers of people to the point where it could be generalized and added to existing population-level evidence [<xref ref-type="bibr" rid="ref47">47</xref>].</p>
        <p>The aim of this study was to assess what can be learnt from an NLP-based analysis of a large quantity of unstructured SGOPE data. This can be broken down into 5 subquestions:</p>
        <list list-type="bullet">
          <list-item>
            <p>To assess whether topic mining can elicit the themes that are contained in the data</p>
          </list-item>
          <list-item>
            <p>To explore how sentiment analysis can be used to assess perceived effectiveness</p>
          </list-item>
          <list-item>
            <p>To compare various methods of theme and effectiveness identification</p>
          </list-item>
          <list-item>
            <p>To assess whether linguistic analysis can identify perceived causality from the text</p>
          </list-item>
          <list-item>
            <p>To establish whether these techniques can be used to develop a methodology for this type of analysis</p>
          </list-item>
        </list>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>The dataset contained 69,022 publicly available social media posts and threads that included the terms <italic>modafinil</italic>, <italic>provigil</italic>, <italic>armodafinil</italic>, or <italic>nuvigil</italic> as of July 2017. The dataset was supplied by Treato Ltd, which was a web-based social media data mining service that collected publicly available health-related posts (ie, posts viewable by anyone without requiring log-in) from &#62;10,000 global blogs and online forums. The company agreed to supply English-language data relating to modafinil use, using its own proprietary algorithms based on the Unified Medical Language System to create a searchable dataset that can be analyzed in aggregate [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
        <p>Analysis code was developed using Python (version 3.8.5; Python Software Foundation) [<xref ref-type="bibr" rid="ref53">53</xref>] in JupyterLab (version 3.0.15; Project Jupyter) [<xref ref-type="bibr" rid="ref54">54</xref>]. Bearing in mind the need to retain as much context to the data as possible, as described in the Methodology subsection, we took a staged approach to data cleaning, initially performing a minimal level of transformation and parsing of fields. The time stamp field, originally formatted as <italic>2011-01-01 00:00:00 UTC</italic>, was simplified to <italic>PostYear</italic> to represent the year the post was published. Line breaks, paragraph breaks, and other extra spaces were removed. The URL field was parsed to identify the main website or forum name. New fields were created for subsite names. Having extracted the site name, it became obvious that many of the URLs contained either the name of the condition that was of primary interest to the poster or the title of the thread or question that they were referring to. Using clustering techniques, we were able to group and extract this detail from the URL. Three new fields were created to represent the second-level domain name, the site’s focus condition (if applicable), and the extracted thread titles. To maximize the options for analysis, the cleaned data were structured to include 3 additional fields: <italic>TextOnly</italic> (response only), <italic>Title</italic> (thread title), and <italic>TextWithTitle</italic> (thread title preceding each response). All references to dosage amount in <italic>mg</italic> were standardized to <italic>xxxmg</italic>. Exact duplicate posts and obvious spam posts were removed. After data deduplication and spam removal, all forms of author identification were removed. The restructured file was saved in CSV format for the next stages. The <italic>TextOnly</italic> and <italic>Title</italic> fields were exported as 2 separate corpora text files for linguistic analysis. Keeping them distinct avoided the possibility of the repetition of the title words skewing any frequency-based analysis. These steps enabled us to obtain a dataset that retained an optimal level of quality and flexibility and upon which further preprocessing could be performed specific to the individual task.</p>
      </sec>
      <sec>
        <title>Topic Modeling to Identify Themes</title>
        <p>Topic modeling was the main method for theme detection. On the basis of a previous study that evaluated 4 of the most widely used bag-of-words topic modeling methods [<xref ref-type="bibr" rid="ref55">55</xref>], we selected latent Dirichlet allocation (LDA) and nonnegative matrix factorization (NMF) for comparison because they were seen to deliver the most meaningful extracted topics. Both LDA and NMF use the bag-of-words approach, which disregards any order within the corpus and uses word frequency to generate topics. Although the LDA method has been the most widely used method for patient experience feedback [<xref ref-type="bibr" rid="ref56">56</xref>], a previous study found that NMF yields better results than LDA when used for short texts [<xref ref-type="bibr" rid="ref57">57</xref>]. Other comparisons between the 2 methods found that LDA output was more semantically interpretable with more distinct categories [<xref ref-type="bibr" rid="ref58">58</xref>], while NMF was faster and therefore less resource intensive [<xref ref-type="bibr" rid="ref59">59</xref>]. However, another comparison found the opposite [<xref ref-type="bibr" rid="ref60">60</xref>]. Yet another study suggested that NMF returned higher quality topics than LDA on smaller datasets [<xref ref-type="bibr" rid="ref61">61</xref>]. As part of the project involves identifying a methodology for this type of data that can be developed for use on other datasets, we compared the findings of both methods using the <italic>gensim</italic> (version 3.8.3) [<xref ref-type="bibr" rid="ref62">62</xref>] and <italic>sklearn</italic> (version 0.23.1) [<xref ref-type="bibr" rid="ref63">63</xref>] libraries as they relate to SGOPE data. Another package—Top2Vec (version 1.0.24) [<xref ref-type="bibr" rid="ref64">64</xref>]—using word-embedding methods was released during the study and was included for comparison. Word-embedding methods work by considering each word in the context of its neighbors, creating a numeric vector where words with similar meanings are grouped together, which has been seen as a significant advance in trying to establish the meaning or topics of posts [<xref ref-type="bibr" rid="ref65">65</xref>].</p>
        <p>Additional preprocessing for the LDA and NMF methods included removing stop words and punctuation and converting all text to lowercase. The stop word list was extended to include common name variations for modafinil. Bigrams and trigrams were generated; text tokens were lemmatized; and part-of-speech (POS) tags relating to nouns, adjectives, verbs, and adverbs were retained. Coherence and perplexity values were generated to help assess the performance of each model. The LDA outputs included generating the 10 most discriminative words for each topic; the weighting of each word within the allocated topic; and, for the <italic>gensim</italic> LDA model, a computer-based visualization (pyLDAvis [version 2.1.2]) that demonstrated the words for each topic and the degree of overlap between topics [<xref ref-type="bibr" rid="ref66">66</xref>]. This visualization could also be used to show varying values of alpha and beta, the balance between words per topic and topics per document.</p>
        <p>For the embedding-based method, no preprocessing of the text or prespecified number of topics was required because the Top2Vec algorithm calculates the number of topics contained within the corpus.</p>
      </sec>
      <sec>
        <title>Sentiment Analysis to Evaluate Effectiveness</title>
        <p>Two widely used lexicon-based methods—TextBlob (version 0.15.3) [<xref ref-type="bibr" rid="ref67">67</xref>] and Valence Aware Dictionary and Sentiment Reasoner (VADER; version 3.3.2) [<xref ref-type="bibr" rid="ref68">68</xref>]—were compared and the strengths and limitations of both identified. The original cleaned <italic>TextOnly</italic> field was selected for the sentiment analysis because this contained only the responses to the posts. Word counts were calculated for each post. Capitalization, punctuation, and stop words were retained for this part of the analysis because each can contribute meaning or intensity to the analysis. TextBlob [<xref ref-type="bibr" rid="ref67">67</xref>] calculates values for polarity and subjectivity for each post. The lexicon it uses derives from a separate library in the Natural Language Toolkit. It focuses on adjectives from customer product reviews that have been tagged by humans for polarity and subjectivity. Subjectivity analysis assesses how objective or subjective the text is, whereas polarity classification determines whether the text is positive or neutral. It uses the sentiment lexicon to assign scores for polarity and subjectivity for each word, which are then averaged out using a weighted average to provide an overall sentence sentiment score. Basic statistics were generated for both values, and the numerical polarity score was converted to categorical values of positive (&#62;0), neutral (0), and negative (&#60;0). Plots showing the distribution and the relationship between the polarity and subjectivity scores were generated.</p>
        <p>The methods behind the design of the VADER library make it possibly a better choice for sentiment analysis of social media–type posts than TextBlob [<xref ref-type="bibr" rid="ref69">69</xref>]. Rather than calculating the polarity and subjectivity of a post, it scores each post on 4 aspects: positive, negative, neutral, and compound. The positive, negative, and neutral scores represent the proportions of the post that fall in these categories. The compound score is calculated from the other 3 scores, normalized to a value between –1 and 1, and represents the overall sentiment of the post [<xref ref-type="bibr" rid="ref68">68</xref>]. The lexicon VADER uses is based on general language rather than reviews [<xref ref-type="bibr" rid="ref70">70</xref>] and contains approximately 7500 words.</p>
        <p>Although the basic sentiment is calculated on the individual words, VADER looks at the whole text and can take negations into account [<xref ref-type="bibr" rid="ref71">71</xref>]. This can help to give a balanced assessment when the post contains contradictory words out of context. This approach is intended to take into account some of the characteristics often seen in SGOPE data where features such as repeated punctuation or capital letters can be used to signify stronger sentiment [<xref ref-type="bibr" rid="ref68">68</xref>].</p>
        <p>The VADER lexicon is easily modified. After reviewing the positive and negative words it had identified from a sample of posts at each end of the sentiment spectrum, we modified the lexicon, removing the positive words <italic>credit</italic>, <italic>free</italic>, <italic>accepted</italic>, and <italic>approval</italic> because these words were frequently included in spam posts. We also added frequently mentioned effects to the negative lexicon, including <italic>headache</italic>, <italic>jittery</italic>, <italic>rash</italic>, <italic>tired</italic>, <italic>harmful</italic>, <italic>disappointed</italic>, <italic>sleepy</italic>, <italic>nightmare</italic>, and <italic>intolerable</italic>. In addition, we modified the positive lexicon to include <italic>awake</italic>, <italic>focus</italic>, <italic>concentrate</italic>, <italic>normal</italic>, <italic>productive</italic>, <italic>helped</italic>, <italic>grateful</italic>, <italic>miracle</italic> and <italic>lifesaver</italic>.</p>
        <p>The results from each method were then compared against each other.</p>
      </sec>
      <sec>
        <title>Linguistic Analysis</title>
        <p>We extracted the narrative fields from each post to form a corpus, which was then imported into Sketch Engine [<xref ref-type="bibr" rid="ref72">72</xref>], a corpus linguistics tool. Each token was assigned a POS tag from the English TreeTagger POS tagset with Sketch Engine modifications [<xref ref-type="bibr" rid="ref72">72</xref>]. Using the English Web corpus 2020 as a reference corpus [<xref ref-type="bibr" rid="ref73">73</xref>], we generated lists of the top 1000 keywords, key terms, and n-grams specific to the dataset to help identify both themes and examples of causal text. N-grams are sequences of words, numbers, or symbols that appear in a specific order within the text and are helpful in identifying commonly used phrases of up to <italic>n</italic> words within the corpus [<xref ref-type="bibr" rid="ref74">74</xref>]. For each word or term in the lists, we recorded its frequency in the focus corpus, the number of posts it appeared in, and a calculated score based on its relative frequency in each corpus. We then classified the top 100 highest-scoring keywords and key terms into themes and summarized the results to see how this technique compared to the topic modeling. N-grams that indicated a possible cause and effect or temporal dimension were identified. Combining these selected n-grams with concordance techniques revealed specific relevant sentences that expressed the poster’s understanding of these sequential events.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Ethics approval for the study was granted by the University of Warwick (BSREC Ref 11/19-20) in October 2019. No personally identifiable information other than the online “user handle” was included in the data collection, and this was removed and replaced with a unique ID for each post as part of the cleaning and preparation process.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Descriptive</title>
        <p>The cleaned dataset contained 68,559 records from a 6-year period (2011-2016). A total of 790 unique top-level sites were identified, with the number of posts per site ranging from 25,355 to 1. Reddit was the largest overall source, with 36.98% (25,355/68,559) of the posts from 213 subreddits, each of which represents a separate community. Of the 213 subreddits, 5 (2.3%) contributed &#62;1000 posts, with the largest being the <italic>afinil</italic> subreddit (n=12,870, 18.77% posts). Post lengths ranged from 1 to 1577 (mean 100.4, SD 100.86; IQR 34-132) words. The <italic>TextOnly</italic> field comprised 7.99 million tokens, 6.84 million words, 104,565 unique words, and 388,516 sentences. Parsing the site or forum URLs revealed 166 separate health conditions. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows analysis by the number of posts posted to the top 10 condition-specific sites. This does not assume that the specified condition was the primary or sole condition of the poster but rather reflects the poster’s choice in selecting where to post their contribution.</p>
      </sec>
      <sec>
        <title>Topic Modeling</title>
        <sec>
          <title>Overview</title>
          <p>First, using the <italic>gensim</italic> LDA library, initial parameters were set to 8 topics (as per the earlier themes identified [<xref ref-type="bibr" rid="ref21">21</xref>]) and 50 iterations. The default output is the top 10 words per topic, together with the weighting of each word within the topic. Although the returned topic word lists could all be seen to relate to the poster’s experience, they did not seem to be clearly distinguishable from each other. The visualization (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) indicates a substantial overlap of topics 1 to 4, which between them represented 72.7% (49,842/68,559) of the tokens.</p>
          <p>Coherence model testing (<xref rid="figure1" ref-type="fig">Figure 1</xref>) using the NMF method (range 5-50) suggested that the optimal number of topics was 27; therefore, we ran the model again with varying numbers of iterations across the data.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Coherence testing model (range 5-50).</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e54321_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Gensim LDA</title>
          <p>Running the LDA model with parameters of 27 topics and 200 passes (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>) showed a clearer distribution of topics, but there was still a substantial degree of overlap of topics 1 to 6. Increasing the number of passes to 1000 did not seem to significantly improve the visual evaluation (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), although it took &#62;5 times as long to run.</p>
          <p>Although both visualizations show some distinct topic circles that are not overlapped by others, the categorization of the topics into themes was not possible because most of them could have multiple interpretations. The top 10 topic words for each of the 27 topic models and the attempted mapping are shown in <xref rid="figure2" ref-type="fig">Figures 2</xref> and <xref rid="figure3" ref-type="fig">3</xref>.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Latent Dirichlet allocation model: 27 topics.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e54321_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>In terms of the processing load, the timings of the <italic>gensim</italic> LDA models were impacted far more by the number of iterations through the data than the number of topics selected, with the simplest configuration—8 topics and 50 iterations—taking 32 minutes, 27 topics and 200 iterations taking 2 hours 16 minutes, and 27 topics and 1000 iterations taking 11 hours and 6 minutes. Adjusting the memory handling parameters reduced the processing time significantly (13 min, 1 h 44 min, and 8 h 13 min, respectively) but gave the highest coherence score to a model with just 2 topics and 10 passes, which did not seem a plausible result.</p>
        </sec>
        <sec>
          <title>Sklearn LDA and NMF Methods</title>
          <p>Running the same 27-topic model with the <italic>sklearn</italic> library enabled a direct comparison of the LDA and NMF methods. <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> presents a comparison of the top 10 words per topic and the number of posts each model classified as belonging to each topic, together with the percentage of the corpus per topic in descending order for each method. It also includes our evaluation of the theme that the topic words most closely indicated. As with the earlier <italic>gensim</italic> LDA models, trying to map each of the returned topic word lists to the identified themes was complicated by the degree of overlap in most of the lists. The bar graphs (<xref rid="figure2" ref-type="fig">Figures 2</xref> and <xref rid="figure3" ref-type="fig">3</xref>) show that the NMF method returned topics that were distributed slightly more evenly throughout the corpus, whereas the LDA version identified some topics that were much less represented. The <italic>sklearn</italic> LDA model allocated 94.45% (64,753/68,559) of the posts to just 8 (30%) of the 27 topics; the remaining 19 (70%) topics each represented &#60;1% (3806/68,559) of the posts. In comparison, the largest NMF topic was assigned to 16.6% (11,381/68,559) of the posts, with the remaining 26 ranging from 5.4% (3702/68,559) to 2% (1371/68,559) of the posts. Future work could look at going back to the posts included in some of the smaller topics to assess their relevance to the research question.</p>
          <p>Mapping the topics found by both models, even at a superficial level, to distinct themes was problematic. For the <italic>sklearn</italic> LDA model, only 26% (7/27) of the topics could be mapped to the general themes. The NMF model was slightly more interpretable with 52% (14/27) of the topics that could be seen as relating to themes.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Nonnegative matrix factorization model: 27 topics.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e54321_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Top2Vec Library</title>
          <p>The Top2Vec library demonstrated substantially faster performance compared to the LDA method. By default, it returns the number of detected topics, the top 50 words per topic, and the number of posts per topic. The optimal DeepLearn parameter took 2 hours 15 minutes to generate 367 topics from the dataset, while the Learn parameter took 19 minutes to generate 566 topics.</p>
          <p>The results from the DeepLearn model were used for analysis. The percentage of posts per topic ranged from 2.94% (2017/68,559) in the largest group to 0.07% (45/68,559) in the smallest. Overall, 70% (257/367) of the posts could be mapped to either the P1 themes or the codes used during the thematic analysis. The P1 study refer to the previous part of the study where we compared a sample of 260 posts using a qualitative analysis with a basic NLP or corpus [<xref ref-type="bibr" rid="ref21">21</xref>]. In total, 186 (50.7%) of the 367 topics representing 38,637 (56.36%) of the 68,559 posts could be mapped to the P1 themes. A further 71 (19.3%) of the 367 topics representing 15,557 (22.69%) of the 68,559 posts were mapped to the codes.</p>
          <p>In total, 110 (30%) of the 367 topics representing 14,345 (20.92%) of the 68,559 posts were initially categorized as being uninterpretable without taking a deeper look at the specific posts. Of the 367 topics, 31 (8.4%; 3913/68,559, 5.7% posts) combined multiple themes and were classed as mixed; 50 (13.6%; 7019/68,559, 10.24% posts) were uninterpretable and were labeled unclear; and 29 (7.9%; 3413/68,559, 5% posts) contained words indicating that the topics related to possible spam posts.</p>
        </sec>
      </sec>
      <sec>
        <title>Sentiment Analysis</title>
        <p>The TextBlob library returns values for both polarity and subjectivity. Of the 68,559 posts, the initial results for polarity were as follows: 47,282 (69%) positive, 6229 (9.09%) neutral, and 15,048 (21.95%) negative. The polarity scores extended across the whole range from −1 to +1 (mean +0.1003). The subjectivity scores also covered the entire range from 0 to +1 (mean +0.4638).</p>
        <p>Using the previously mentioned parameters of positive (&#62;0), neutral (0), and negative (&#60;0), the initial results returned from the standard VADER analysis were 64.03% (43,898/68,559) positive, 6.7% (4592/68,559) neutral, and 29.27% (20,070/68,559) negative. Modifying the lexicon yielded the following results: 65.01% (44,610/68,559) positive, 6.44% (4417/68,559) neutral, and 28.49% (19,533/68,559) negative. The compound score values ranged −0.9991 to +0.9997 (mean +0.2825). The distribution is shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Basic statistics for the extended VADER analysis (n=68,559).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="120"/>
            <col width="170"/>
            <col width="230"/>
            <col width="230"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Compound</td>
                <td>Positive</td>
                <td>Neutral</td>
                <td>Negative</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Scores, mean (SD; min-max)</td>
                <td>0.28250790 (0.61562543; –0.99910000 to 0.99970000)</td>
                <td>0.11785168 (0.09204523; 0.00000000-1.00000000)</td>
                <td>0.81442440 (0.10185110; 0.00000000-1.00000000)</td>
                <td>0.06772396 (0.06403353; 0.00000000-0.67000000)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Percentile values</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>25%</td>
                <td>–0.1779</td>
                <td>0.0590</td>
                <td>0.7590</td>
                <td>0.0120</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>50%</td>
                <td>0.4515</td>
                <td>0.1070</td>
                <td>0.8200</td>
                <td>0.0580</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>75%</td>
                <td>0.8407</td>
                <td>0.1600</td>
                <td>0.8760</td>
                <td> 0.1010</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>Although the results from both Vader and TextBlob methods were similar, with both showing a majority of posts being assessed as positive, comparing the distribution shape of the sentiment values between the methods showed distinct differences. Both are skewed toward the right, indicating the positive mean value; however, TextBlob showed a normal type of distribution of polarity apart from those posts classified as neutral, whereas Vader showed a similar peak at 0 but seemed to assess more of the posts as being at the extremes of the available range (<xref rid="figure4" ref-type="fig">Figure 4</xref>).</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>TextBlob and Valence Aware Dictionary and Sentiment Reasoner (VADER) distributions.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e54321_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The average word count of the 10 highest-rated posts based on the VADER analysis was 704, and that of the lowest-rated posts was 1095. For TextBlob, the average word count of the 10 highest-rated posts was 39, and that of the lowest-rated posts was 23. VADER is reported as performing better on short texts [<xref ref-type="bibr" rid="ref68">68</xref>]. The P3 dataset (total 68,559 posts) contained 1232 posts with a word count of &#62;400 and 8496 posts longer than 200 words. However, running VADER again on the reduced datasets showed little difference in the percentages of posts rated in each category (<xref ref-type="table" rid="table2">Table 2</xref>; <xref rid="figure5" ref-type="fig">Figure 5</xref>).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Valence Aware Dictionary and Sentiment Reasoner results from limiting post length.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td/>
                <td>All posts, standard (n=68,559)</td>
                <td>All posts, extended (n=68,559)</td>
                <td>&#60;400 words, extended (n=67,327)</td>
                <td>&#60;200 words, extended (n=60,063)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Compound scores, mean (SD; IQR)</td>
                <td>0.2658 (0.613580; –0.2040 to +0.8250)</td>
                <td>0.2819 (0.61587235; –0.1794 to +0.8404)</td>
                <td>0.2816 (0.609968; –0.1779 to +0.8438)</td>
                <td>0.2658 (0.587878; –0.1655 to +0.7984)</td>
              </tr>
              <tr valign="top">
                <td>Positive, n (%)</td>
                <td>43,898 (64.03)</td>
                <td>44,586 (65.03)</td>
                <td>43,781 (64.18)</td>
                <td>38,546 (64.18)</td>
              </tr>
              <tr valign="top">
                <td>Neutral, n (%)</td>
                <td>4592 (6.70)</td>
                <td>4416 (6.44)</td>
                <td>4416 (6.56)</td>
                <td>4414 (7.35)</td>
              </tr>
              <tr valign="top">
                <td>Negative, n (%)</td>
                <td>20,070 (29.27)</td>
                <td>19,557 (28.53)</td>
                <td>19,130 (28.41)</td>
                <td>17,103 (28.48)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>The impact of word count on sentiment. VADER: Valence Aware Dictionary and Sentiment Reasoner.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e54321_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Corpus Linguistics</title>
        <p>Using the corpus linguistic tool Sketch Engine, we generated 1000 key n-grams specific to the SGOPE corpus, identifying many phrases that could suggest a form of causality. Attempting to map these key n-grams to the individual themes was problematic. Unlike the key words and terms, only 16 (16%) of the top 100 n-grams specific to the corpus could be directly mapped to themes. A full analysis would require looking at the n-grams in the context of the post. However, the key n-grams are helpful in detecting expressions of causality. Unlike the individual words, all of which have a POS tag that can indicate tense, n-grams are combinations of words. It was possible to label many of them as relating to past, present, or future tense or as indicating possible belief. Examples are shown in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Key n-grams indicating possible belief.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="170"/>
            <col width="250"/>
            <col width="240"/>
            <col width="110"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td>Key n-gram</td>
                <td>Frequency (n=68,559), n</td>
                <td>Total number of documents including the phrase (n=68.559), n</td>
                <td>Score (relative frequency compared to the reference corpus)</td>
                <td>Theme</td>
                <td>Tense</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>keep me awake</td>
                <td>406</td>
                <td>396</td>
                <td>50.0</td>
                <td>Effect</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>works for me</td>
                <td>408</td>
                <td>398</td>
                <td>49.2</td>
                <td>—<sup>a</sup></td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i have found</td>
                <td>458</td>
                <td>440</td>
                <td>48.8</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>but it does</td>
                <td>488</td>
                <td>485</td>
                <td>48.3</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i find that</td>
                <td>403</td>
                <td>388</td>
                <td>46.1</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>was able to</td>
                <td>610</td>
                <td>579</td>
                <td>46.0</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>that i can</td>
                <td>474</td>
                <td>460</td>
                <td>45.4</td>
                <td>Outcome</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i felt like</td>
                <td>396</td>
                <td>377</td>
                <td>45.2</td>
                <td>Effect</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>i find it</td>
                <td>407</td>
                <td>400</td>
                <td>44.4</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>gave me a</td>
                <td>395</td>
                <td>389</td>
                <td>44.3</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>in my experience</td>
                <td>377</td>
                <td>365</td>
                <td>43.8</td>
                <td>—</td>
                <td>N/A<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>because i have</td>
                <td>381</td>
                <td>380</td>
                <td>42.6</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>because i was</td>
                <td>377</td>
                <td>363</td>
                <td>41.2</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>because of the</td>
                <td>576</td>
                <td>561</td>
                <td>34.7</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>and i think</td>
                <td>368</td>
                <td>363</td>
                <td>32.3</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>in my opinion</td>
                <td>301</td>
                <td>293</td>
                <td>29.7</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>and it seems</td>
                <td>258</td>
                <td>257</td>
                <td>29.7</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i have noticed</td>
                <td>242</td>
                <td>235</td>
                <td>29.1</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>but i feel</td>
                <td>241</td>
                <td>237</td>
                <td>28.7</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>it gives me</td>
                <td>230</td>
                <td>226</td>
                <td>27.9</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>to kick in</td>
                <td>225</td>
                <td>217</td>
                <td>27.9</td>
                <td>Effect</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>seems to work</td>
                <td>229</td>
                <td>226</td>
                <td>27.6</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>it seems to be</td>
                <td>237</td>
                <td>237</td>
                <td>27.4</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>has helped me</td>
                <td>225</td>
                <td>219</td>
                <td>27.2</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>because i do</td>
                <td>236</td>
                <td>233</td>
                <td>27.1</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>effect on me</td>
                <td>216</td>
                <td>212</td>
                <td>26.9</td>
                <td>Effect</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>me feel like</td>
                <td>220</td>
                <td>216</td>
                <td>26.9</td>
                <td>Effect</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>it gave me</td>
                <td>218</td>
                <td>213</td>
                <td>26.7</td>
                <td>Effect</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>changed my life</td>
                <td>216</td>
                <td>209</td>
                <td>26.5</td>
                <td>Outcome</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>but it seems</td>
                <td>231</td>
                <td>231</td>
                <td>26.3</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>gives me a</td>
                <td>216</td>
                <td>210</td>
                <td>26.3</td>
                <td>Effect</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>think it is</td>
                <td>255</td>
                <td>247</td>
                <td>26.3</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>as soon as i</td>
                <td>227</td>
                <td>223</td>
                <td>25.9</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i can say</td>
                <td>229</td>
                <td>218</td>
                <td>25.6</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>it does help</td>
                <td>205</td>
                <td>204</td>
                <td>25.6</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>for me is</td>
                <td>212</td>
                <td>208</td>
                <td>25.5</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i still feel</td>
                <td>206</td>
                <td>200</td>
                <td>25.4</td>
                <td>Effect</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>my experience with</td>
                <td>204</td>
                <td>201</td>
                <td>25.0</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>and i know</td>
                <td>228</td>
                <td>225</td>
                <td>24.8</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>thought i was</td>
                <td>211</td>
                <td>204</td>
                <td>24.7</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>thought it was</td>
                <td>237</td>
                <td>233</td>
                <td>24.6</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>and it helps</td>
                <td>196</td>
                <td>194</td>
                <td>24.4</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>know if i</td>
                <td>208</td>
                <td>204</td>
                <td>24.3</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>i felt like i</td>
                <td>198</td>
                <td>188</td>
                <td>24.1</td>
                <td>Effect</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>i found it</td>
                <td>209</td>
                <td>202</td>
                <td>24.0</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>i thought it</td>
                <td>229</td>
                <td>227</td>
                <td>23.9</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>seems to have</td>
                <td>242</td>
                <td>234</td>
                <td>23.5</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>it helps with</td>
                <td>185</td>
                <td>183</td>
                <td>23.2</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>it has helped</td>
                <td>187</td>
                <td>185</td>
                <td>23.2</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>it seems that</td>
                <td>232</td>
                <td>227</td>
                <td>23.2</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i know this</td>
                <td>200</td>
                <td>197</td>
                <td>23.2</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>feel like it</td>
                <td>190</td>
                <td>186</td>
                <td>22.9</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>because of my</td>
                <td>191</td>
                <td>188</td>
                <td>22.9</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>am able to</td>
                <td>189</td>
                <td>178</td>
                <td>22.9</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>great for me</td>
                <td>182</td>
                <td>182</td>
                <td>22.8</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>i can sleep</td>
                <td>181</td>
                <td>177</td>
                <td>22.8</td>
                <td>Effect</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i started to</td>
                <td>197</td>
                <td>186</td>
                <td>22.8</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>and it worked</td>
                <td>186</td>
                <td>186</td>
                <td>22.7</td>
                <td>Effect</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>have found that</td>
                <td>198</td>
                <td>195</td>
                <td>22.7</td>
                <td>—</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>give you a</td>
                <td>228</td>
                <td>226</td>
                <td>22.7</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>and i felt</td>
                <td>188</td>
                <td>184</td>
                <td>22.6</td>
                <td>Effect</td>
                <td>Past</td>
              </tr>
              <tr valign="top">
                <td>it wears off</td>
                <td>176</td>
                <td>172</td>
                <td>22.2</td>
                <td>Dosage</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>a huge difference</td>
                <td>183</td>
                <td>180</td>
                <td>22.2</td>
                <td>Effect</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>better for me</td>
                <td>177</td>
                <td>176</td>
                <td>22.2</td>
                <td>—</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>this is a</td>
                <td>642</td>
                <td>627</td>
                <td>22.2</td>
                <td>—</td>
                <td>Present</td>
              </tr>
              <tr valign="top">
                <td>i found out</td>
                <td>187</td>
                <td>181</td>
                <td>21.7</td>
                <td>—</td>
                <td> Past</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Could not be mapped.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The n-gram “have found that” was shown to be indicative of causal expression in the exploratory study [<xref ref-type="bibr" rid="ref21">21</xref>]. Using it on the P3 dataset and filtering out any of the sentences that did not explicitly mention modafinil or one of its name variants in the concordance sentence returned the examples presented in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>.</p>
        <boxed-text id="box2" position="float">
          <title>Concordance examples for the n-gram “have found that.</title>
          <p>
            <bold>Tolerance</bold>
          </p>
          <p>• “I have been on Nuvigil for about 2 years now, and I have found that I have to skip my medication at least one day per week in order to not lose its effectiveness.” [Post ID 6289]</p>
          <p>
            <bold>Side effects</bold>
          </p>
          <p>• “I have found that I get visuals from modafinil anyways, for the first few hours of it’s effects I have mild visuals and a solid body load.” [Post ID 7711]</p>
          <p>• “After taking modafinil 200mg next day i have found that i have a skin rash on the right hand and itchy skin on both hands.” [Post ID 26,660]</p>
          <p>
            <bold>Dosage</bold>
          </p>
          <p>• “Forgetting and False Memories I am on Nuvigil, and I have found that I become a ‘zombie’ when they have my dosage too high.” [Post ID 29,323]</p>
          <p>
            <bold>Other Intervention, but effect or outcome</bold>
          </p>
          <p>• “I have found that I have been able to reduce my Prozac dosage while taking Provigil.” [Post ID 53,387]</p>
          <p>
            <bold>Outcome</bold>
          </p>
          <p>• “I also have found that I am much more confident since started on provigil (200mg/day).” [Post ID 59,900]</p>
          <p>
            <bold>Comparison</bold>
          </p>
          <p>• “I have tried Adderall and Provigil and have found that I prefer a sister drug to the Provigil called Nuvigil, but my insurance company won’t pay for it so I’m stuck with the Provigil or Adderall.” [Post ID 67, 037]</p>
        </boxed-text>
        <p>The word sketch tool can be used to demonstrate the context of how any word or phrase is used within the corpus. Many of the key n-grams for this corpus relate to an observation the poster has made or an effect they have noticed in relation to the subject of their post. The most frequent key n-gram in the corpus is “in the morning,” which appears 3016 times in 2627 posts. Using the corpus query language to filter down to only those concordances that included modafinil in the same sentence returned 183 examples of dosage patterns, amounts, drug combinations, timing advice, and effects. As with the P1 study, posters reported how the standard dose can be excessive for some people [<xref ref-type="bibr" rid="ref21">21</xref>]:</p>
        <p>...my Dr prescribed starting dose of 200mg modafinil...once in the morning...with the instruction that if the200mg did not keep me awake that I should double the dose to 400mg once a day in the a.m...the 200mg was too much all at once...all it did was enhance the side effects to the point that I wasn’t able to notice if the medicine was doing what it was supposed to.because I was too busy cradling my cracked feeling skull and drinkn insane amounts of water. [Post ID 3209]</p>
        <p>Another frequent lemma related to effectiveness in the n-grams is “feel,” which has been used by post writers in many ways. As a verb, it was used 22,767 times in the corpus. Splitting the occurrences into grammatical categories, as shown in <xref ref-type="table" rid="table4">Table 4</xref>, highlights the categories, some of the most frequent examples of each phrase from the corpus, and the number of occurrences for each category. A visual representation of the most frequent adjectives and objects associated with the verb “feel” is shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>, while <xref rid="figure7" ref-type="fig">Figure 7</xref> displays the most frequent collocates. The size of each circle represents the frequency of the collocate. Of note, “good” is the most prominent adjective collocate of “feel,” supporting the hypothesis that modafinil is perceived as effective by many of the posters. The full list of collocates of “feel,” together with their frequencies in the corpus, is available in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>.</p>
        <p>Feeling normal was identified as being an important outcome for some posters in the earlier study [<xref ref-type="bibr" rid="ref21">21</xref>]. <xref ref-type="boxed-text" rid="box3">Textbox 3</xref> presents examples of n-gram concordances for “makes me feel,” filtered by “normal.”</p>
        <table-wrap position="float" id="table4">
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="470"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Grammatical categories</td>
                <td>Examples</td>
                <td>Frequency (n=68,559), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>pronominal subjects of “feel”</td>
                <td>I feel, you feel, made me feel, it feels</td>
                <td>12,026 (17.54)</td>
              </tr>
              <tr valign="top">
                <td>modifiers of “feel”</td>
                <td>Don’t feel, I still feel, I just feel, really feel</td>
                <td>6842 (9.98)</td>
              </tr>
              <tr valign="top">
                <td>adjectives after “feel”</td>
                <td>feel better, feel tired, feel worse, feel great, feel sleepy, feel normal</td>
                <td>5342 (7.92)</td>
              </tr>
              <tr valign="top">
                <td>objects of “feel”</td>
                <td>feel the effects, feel a bit, felt nothing</td>
                <td>4354 (6.61)</td>
              </tr>
              <tr valign="top">
                <td>prepositional phrases associated with “feel”</td>
                <td>feel like, feel in, feel on, feel though</td>
                <td>2163 (3.15)</td>
              </tr>
              <tr valign="top">
                <td>subjects of “feel”</td>
                <td>I feel, my body feels, I don’t feel</td>
                <td>2032 (2.96)</td>
              </tr>
              <tr valign="top">
                <td>pronominal objects of “feel”</td>
                <td>feel it, you feel you, feel myself</td>
                <td>689 (1)</td>
              </tr>
              <tr valign="top">
                <td>complements of “feel”</td>
                <td>feel a lot better, felt it more, felt a bit weird</td>
                <td>289 (0.42)</td>
              </tr>
              <tr valign="top">
                <td>“wh-” words following “feel”</td>
                <td>feel when, feel what, I feel that, feel how, feel normal which</td>
                <td>179 (0.26)</td>
              </tr>
              <tr valign="top">
                <td>“feel” and or</td>
                <td>sleep and feel, yawning and feeling</td>
                <td>150 (0.22)</td>
              </tr>
              <tr valign="top">
                <td>“-ing” objects of “feel”</td>
                <td>felt taking, felt amazing</td>
                <td>81 (0.12)</td>
              </tr>
              <tr valign="top">
                <td>particles after “feel”</td>
                <td>feel up to it, feeling down,</td>
                <td>74 (0.11)</td>
              </tr>
              <tr valign="top">
                <td>infinitive objects of “feel”</td>
                <td>it feels to be</td>
                <td>37 (0.05)</td>
              </tr>
              <tr valign="top">
                <td>particles after “feel” with object</td>
                <td>feel hyped up, to feel out</td>
                <td>19 (0.03)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Most frequent adjectives and objects of “feel.”.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e54321_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Word sketch of the verb “feel.”.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e54321_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <boxed-text id="box3" position="float">
          <title>Concordance of “makes me feel” with “normal.”</title>
          <p>
            <bold>Examples of n-gram concordances for “makes me feel,” filtered by “normal”</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>I have never noticed excessive energy or anything out of the ordinary; it just makes/make me/me feel/feel like a normal person would.</p>
            </list-item>
            <list-item>
              <p>Taking the whole thing almost makes/make me/me feel/feel normal for a while.</p>
            </list-item>
            <list-item>
              <p>Anything that makes me less sleepy makes/make me/me feel/feel more “normal” (i.e., less tired), and not high (course I am not shooting it in my arm or anything).</p>
            </list-item>
            <list-item>
              <p>While Modafinil *feels* like a some sort of drug-induced happiness, Zoloft actually makes/make me/me feel/feel naturally normal and happy.</p>
            </list-item>
            <list-item>
              <p>Cheers. :) I am on Modafinil which makes/make me/me feel/feel normal most of the time. @Nicole – I’m showing my age, but as a student it was ProPlus every time for me!</p>
            </list-item>
            <list-item>
              <p>It just makes/make me/me feel/feel closer to normal.</p>
            </list-item>
            <list-item>
              <p>At first I did feel speedy but now it just makes/make me/me feel/feel normal (ish)!!</p>
            </list-item>
            <list-item>
              <p>Doesn’t jack me up or give me jitters - just makes/make me/me feel/feel as “normal people normal” as I can imagine.</p>
            </list-item>
            <list-item>
              <p>My epileptologist has just put me on nuvigil for sleepiness and it really helps, there is only a day here and there it doesn’t but it’s awesome now most of the time I have the energy that my family has (2 kids) doesn’t make me hyper just honestly makes/make me/me feel/feel more normal.</p>
            </list-item>
            <list-item>
              <p>I take Nuvigil, and, unlike stimulants, it just makes/make me/me feel/feel normal without the waves of crippling exhaustion or a crash at the end of the day.</p>
            </list-item>
            <list-item>
              <p>Nuvigil makes/make me/me feel/feel like a normal person again and without it, my quality of life is severely decreased.</p>
            </list-item>
            <list-item>
              <p>I love nuvigil and it makes/make me/me feel/feel “normal” and have a “normal” life but somedays I feel like I could use another pill and if its *safe* to take it twice a day then that may help me ALOT!!</p>
            </list-item>
            <list-item>
              <p>I have read posts where people talk about feeling revved up from it but for me it just makes/make me/me feel/feel normal.</p>
            </list-item>
            <list-item>
              <p>The provigil makes/make me/me feel/feel normal.</p>
            </list-item>
            <list-item>
              <p>It just makes/make me/me feel/feel normal which is perfect...no jitters.</p>
            </list-item>
            <list-item>
              <p>It makes/make me/me feel/feel normal.</p>
            </list-item>
            <list-item>
              <p>It makes/make me/me feel/feel pretty normal like I used too.</p>
            </list-item>
            <list-item>
              <p>I usually take it around noon at work during the week and it makes/make me/me feel/feel normal, and I can get through the rest of the day.</p>
            </list-item>
            <list-item>
              <p>It makes/make me/me feel/feel normal.</p>
            </list-item>
            <list-item>
              <p>I am taking 200mg an hour before work and it makes/make me/me feel/feel normal.I try not to take it every day, but it definitely helps...makes/make me/me feel/feel normal almost.</p>
            </list-item>
            <list-item>
              <p>I’ve now been feeling like it makes/make me/me feel/feel more “normal” (normal energy &#38; focus) for a few hours past my dose (8am and 2pm) and the other times are like a complete drop in energy, not even normal tired....just SO exhausted.</p>
            </list-item>
            <list-item>
              <p>(It wasn’t my first choice.) The only thing that makes/make me/me feel/feel close to normal is use of stimulants such as Nuvigil, but those give me serious insomnia.</p>
            </list-item>
            <list-item>
              <p>I hate that a pill/pills makes/make me/me feel/feel normal.</p>
            </list-item>
            <list-item>
              <p>It doesn’ make me feel buzzed or jittery, it just makes/make me/me feel/feel “normal.”</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Comparison With Existing Trial Evidence</title>
        <p>The effectiveness of modafinil suggested by this study contrasts with the existing RCT and systematic review evidence that is used to determine treatment pathway options for clinicians [<xref ref-type="bibr" rid="ref28">28</xref>]. Rather than searching for every review or RCT of modafinil, we used Cochrane reviews as a comparison. Cochrane reviews critically appraise individual trials, are recognized as providing high-quality assessment and evidence synthesis, and are also used to contribute to the development of clinical guidelines [<xref ref-type="bibr" rid="ref75">75</xref>]. As of May 2021, a search of the Cochrane Library [<xref ref-type="bibr" rid="ref76">76</xref>] showed that there were 16 published Cochrane reviews for various conditions that included the term modafinil in the title, abstract, or keywords. To compare the findings, we extracted the authors’ evidence summaries, the quality assessments of the evidence, and suggestions for addressing the remaining uncertainties relevant to this project (<xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>). All reviews were inconclusive, with either insufficient [<xref ref-type="bibr" rid="ref77">77</xref>-<xref ref-type="bibr" rid="ref85">85</xref>] or low-quality [<xref ref-type="bibr" rid="ref86">86</xref>-<xref ref-type="bibr" rid="ref92">92</xref>] evidence of effectiveness. One of the main findings of this study was that although modafinil is only currently licensed by the National Institute for Health and Care Excellence for a single condition within the United Kingdom, posters were finding it effective for a wide range of conditions, including central disorders of hypersomnolence, multiple sclerosis, attention-deficit disorder and attention-deficit/hyperactivity disorder, social anxiety, depression, sleep-related breathing disorders, general fatigue, myalgic encephalomyelitis and chronic fatigue syndrome, and fibromyalgia (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Other conditions for which modafinil was used included cancer fatigue, traumatic brain injury, diabetes, epilepsy, autoimmune conditions, pain, irritable bowel syndrome, hepatitis C, and poststroke fatigue. Multimorbidity was a regular feature.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Although a range of positive and negative experiences were reported, our analysis indicates that posters found modafinil effective for their symptoms with similar levels of effectiveness found across all methods. Similar themes were identified by both qualitative and computational analyses. Difficulties in obtaining a prescription or acquiring modafinil were common. All topic-modeling methods returned topics containing words that clearly related to and could be mapped to the themes and subcodes from the earlier qualitative study [<xref ref-type="bibr" rid="ref21">21</xref>]. Linguistic analysis identified expressions of causal belief.</p>
        <p>The overall methodology of the study was designed so that it can be applied to other health-related research questions that use unstructured data. The principles underlying the methods used in this study have shown that they can be used inductively on large volumes of unstructured text to extract the themes, sentiment, and expressions of perceived causality.</p>
        <p>As an inductive and iterative method, topic modeling shows potential for scaling up qualitative analysis [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref93">93</xref>] when working with large volumes of data. The requirement of both the LDA and NMF methods for a defined number of topics to be determined before running the models is problematic. Previous comparisons of findings from both manual coders and NMF topic modeling found that neither group could agree on the ideal number of topics [<xref ref-type="bibr" rid="ref61">61</xref>]. Using the Top2Vec method had the advantage that it did not require a predetermined number of topics or themes to be specified. The Top2Vec embedding-based method was more effective in eliciting topics that mapped to those previously identified through qualitative analysis [<xref ref-type="bibr" rid="ref21">21</xref>]. A possible disadvantage of this model is that, depending on the dataset, it may return too many topics [<xref ref-type="bibr" rid="ref94">94</xref>], but this can be mitigated in a later version of the model through the use of hierarchical topic reduction [<xref ref-type="bibr" rid="ref64">64</xref>].</p>
        <p>Previous studies have commented on how lexicon-based tools trained on general language do not perform as well on health-related text [<xref ref-type="bibr" rid="ref3">3</xref>]. Although lexicon-based sentiment analysis can provide an accurate assessment of text that contains words that express a strong positive or negative sentiment, posts that do not contain many of these predefined words are harder to evaluate. One of the features of the informal nature of SGOPE data is that the writers assume that readers can readily infer the affective reaction they are describing. Descriptive phrases such as “I could go back to work” or “It gave me a headache” suggest the effect of the event but would be viewed as neutral statements by most sentiment analysis models. Developing lexicons that are more relevant to health outcomes would improve and refine the results.</p>
        <p>The inclusion of linguistic analysis added a depth of understanding to the findings that would not have been possible with a pure NLP approach [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. The reported rapid onset of the effect of modafinil, whether positive or negative, together with the temporal sequencing, allowed the identification of text indicating perceived causality.</p>
        <p>Unsupervised methods align more with the inductive approach of qualitative studies and are shown to be effective for exploring SGOPE data. Although topic modeling has not yet been widely used within health research, previous studies have shown how it can be used to generate findings in a similar fashion to grounded theory [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Both topic modeling and the extraction of keywords, key terms, and key n-grams identify what is being spoken about but not how the word or phrase is used in context. Combining NLP with corpus linguistics draws on the strengths of both disciplines [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>] and allows the researcher to identify the content that is most relevant to the research question [<xref ref-type="bibr" rid="ref95">95</xref>].</p>
        <p>This research could be extended in a variety of ways that could be used to improve health outcomes. Extending the case study approach, these could include extracting features such as dosage detail and treatment duration, examining more granular topics, further refining the lexicons used for sentiment analysis, and conducting tense analysis of POS tags of modafinil or other interventions. Combining NLP with linguistics on large quantities of unstructured data could be a valuable source both to identify “off-label” indications and obtain a deeper understanding of the outcomes that patients and their families prioritize and how they are managing their conditions. In terms of methodological development, these methods could also be applied to many different types of unstructured text sources, such as qualitative interview transcripts or the free-text sections of clinical notes.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>The use of unsupervised methods allows for an inductive approach to analysis, and the comparison of findings from multiple methods with those from the exploratory dataset is a strength of this study. SGOPE data analysis relies on the poster’s self-description of their condition, which may include self-diagnosis rather than a clinician’s assessment. The reporting of symptoms and outcomes may not be as accurate or complete as it could be, although this limitation could apply to any form of self-reported data, whether collected in a trial, clinical encounter, or on the web. Self-reported data, particularly regarding hard-to-measure factors such as fatigue and cognition, are subjective but generally reflect the normative value of the patient. The natural, nonclinical language used in informal texts may contain valuable, unexplored, or overlooked information relevant to clinical or research purposes [<xref ref-type="bibr" rid="ref96">96</xref>], but it can also contain spelling or grammatical errors and inappropriate slang or colloquialisms that pose challenges for NLP methods [<xref ref-type="bibr" rid="ref97">97</xref>]. Keyword comparison with a reference corpus was found to be effective in identifying such terms and common misspellings.</p>
        <p>SGOPE data have several known strengths and limitations [<xref ref-type="bibr" rid="ref98">98</xref>-<xref ref-type="bibr" rid="ref100">100</xref>] as a single data source. Using multiple data sites enhanced the representativeness and validity of the sample and reduced the potential for demographic bias and emotional contagion (18), while mitigating the impact of spam or nongenuine posts through the cleaning process. We do recognize the limitation of only including posts written in English. Although social media use is widespread, those who create posts represent a self-selected subset of users, with only 10% estimated to be active posters, while 90% read other users’ posts without contributing their own comments [<xref ref-type="bibr" rid="ref101">101</xref>].</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The study demonstrated the value of combining NLP and linguistic techniques for analyzing large quantities of unstructured text that can then be used as evidence of improved patient outcomes. In contrast to the current systematic review–based evidence, posters with a wide range of conditions found modafinil effective. The methods we used successfully identified the entities and topics contained in posts. The perceived experiences of causality and effectiveness were identified using 2 different methods. Our study indicates that this NLP- and linguistics-based approach can be used to look beyond the literal meaning of the words in posts, gaining an understanding of how posters assess the effectiveness of a health care intervention and the outcomes they value.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Postfrequency across condition-specific sites derived from parsing of data source URLs.</p>
        <media xlink:href="jmir_v26i1e54321_app1.png" xlink:title="PNG File , 56 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Latent Dirichlet allocation: 8 topics and 50 iterations.</p>
        <media xlink:href="jmir_v26i1e54321_app2.png" xlink:title="PNG File , 57 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Latent Dirichlet allocation: 27 topics and 200 iterations.</p>
        <media xlink:href="jmir_v26i1e54321_app3.png" xlink:title="PNG File , 69 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Latent Dirichlet allocation: 27 topics and 1000 iterations.</p>
        <media xlink:href="jmir_v26i1e54321_app4.png" xlink:title="PNG File , 68 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Comparison of the top ten words of the sklearn latent Dirichlet allocation and nonnegative matrix factorization topics (n=27).</p>
        <media xlink:href="jmir_v26i1e54321_app5.docx" xlink:title="DOCX File , 23 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Collocates and frequency of “feel”.</p>
        <media xlink:href="jmir_v26i1e54321_app6.docx" xlink:title="DOCX File , 28 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Cochrane library reviews including modafinil: May 2021.</p>
        <media xlink:href="jmir_v26i1e54321_app7.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 79 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NMF</term>
          <def>
            <p>nonnegative matrix factorization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">POS</term>
          <def>
            <p>part-of-speech</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">RCT</term>
          <def>
            <p>randomized controlled trial</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RWD</term>
          <def>
            <p>real-world data</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SGOPE</term>
          <def>
            <p>spontaneously generated online patient experience</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">VADER</term>
          <def>
            <p>Valence Aware Dictionary and Sentiment Reasoner</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The code generated and analyzed during this study is available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gohil</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vuik</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Darzi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis of health care tweets: review of the methods used</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2018</year>
          <month>04</month>
          <day>23</day>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>e43</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2018/2/e43/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.5789</pub-id>
          <pub-id pub-id-type="medline">29685871</pub-id>
          <pub-id pub-id-type="pii">v4i2e43</pub-id>
          <pub-id pub-id-type="pmcid">PMC5938573</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Edo-Osagie</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>De La Iglesia</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lake</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Edeghere</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>A scoping review of the use of Twitter for public health research</article-title>
          <source>Comput Biol Med</source>
          <year>2020</year>
          <month>07</month>
          <volume>122</volume>
          <fpage>103770</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32502758"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2020.103770</pub-id>
          <pub-id pub-id-type="medline">32502758</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(20)30142-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7229729</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zunic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Corcoran</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis in health and well-being: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>01</month>
          <day>28</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>e16023</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/1/e16023/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16023</pub-id>
          <pub-id pub-id-type="medline">32012057</pub-id>
          <pub-id pub-id-type="pii">v8i1e16023</pub-id>
          <pub-id pub-id-type="pmcid">PMC7013658</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walsh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dwumfour</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cave</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Griffiths</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Spontaneously generated online patient experience data - how and why is it being used in health research: an umbrella scoping review</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2022</year>
          <month>05</month>
          <day>14</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>139</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-022-01610-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-022-01610-z</pub-id>
          <pub-id pub-id-type="medline">35562661</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-022-01610-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC9106384</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Plasek</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Montecalvo</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing and its implications for the future of medication safety: a narrative review of recent advances and challenges</article-title>
          <source>Pharmacotherapy</source>
          <year>2018</year>
          <month>08</month>
          <volume>38</volume>
          <issue>8</issue>
          <fpage>822</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ovidsp.ovid.com/ovidweb.cgi?T=JS&#38;CSC=Y&#38;NEWS=N&#38;PAGE=fulltext&#38;D=emexb&#38;AN=623215708"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/phar.2151</pub-id>
          <pub-id pub-id-type="medline">29884988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lafferty</surname>
              <given-names>NT</given-names>
            </name>
            <name name-style="western">
              <surname>Manca</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Perspectives on social media in and as research: a synthetic review</article-title>
          <source>Int Rev Psychiatry</source>
          <year>2015</year>
          <month>04</month>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>85</fpage>
          <lpage>96</lpage>
          <pub-id pub-id-type="doi">10.3109/09540261.2015.1009419</pub-id>
          <pub-id pub-id-type="medline">25742363</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Foufi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Timakum</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gaudet-Blavignac</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lovis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Mining of textual health information from Reddit: analysis of chronic diseases with extracted entities and their relations</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>06</month>
          <day>13</day>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>e12876</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/6/e12876/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12876</pub-id>
          <pub-id pub-id-type="medline">31199327</pub-id>
          <pub-id pub-id-type="pii">v21i6e12876</pub-id>
          <pub-id pub-id-type="pmcid">PMC6595941</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suresh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Patient-generated health data can provide value in clinical care, research settings</article-title>
          <source>American Academy of Pediatrics News</source>
          <year>2020</year>
          <month>07</month>
          <day>01</day>
          <access-date>2021-05-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aappublications.org/news/2020/07/01/hit070120">https://www.aappublications.org/news/2020/07/01/hit070120</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vilar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Detection of drug-drug interactions through data mining studies using clinical sources, scientific literature and social media</article-title>
          <source>Brief Bioinform</source>
          <year>2018</year>
          <month>09</month>
          <day>28</day>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>863</fpage>
          <lpage>77</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28334070"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbx010</pub-id>
          <pub-id pub-id-type="medline">28334070</pub-id>
          <pub-id pub-id-type="pii">3002852</pub-id>
          <pub-id pub-id-type="pmcid">PMC6454455</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Su</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Deep learning in mental health outcome research: a scoping review</article-title>
          <source>Transl Psychiatry</source>
          <year>2020</year>
          <month>04</month>
          <day>22</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>116</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41398-020-0780-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41398-020-0780-3</pub-id>
          <pub-id pub-id-type="medline">32532967</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41398-020-0780-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7293215</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abbe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Grouin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zweigenbaum</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Falissard</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Text mining applications in psychiatry: a systematic literature review</article-title>
          <source>Int J Methods Psychiatr Res</source>
          <year>2016</year>
          <month>06</month>
          <volume>25</volume>
          <issue>2</issue>
          <fpage>86</fpage>
          <lpage>100</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26184780"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/mpr.1481</pub-id>
          <pub-id pub-id-type="medline">26184780</pub-id>
          <pub-id pub-id-type="pmcid">PMC6877250</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Aspiring to unintended consequences of natural language processing: a review of recent developments in clinical and consumer-generated text processing</article-title>
          <source>Yearb Med Inform</source>
          <year>2016</year>
          <month>11</month>
          <day>10</day>
          <issue>1</issue>
          <fpage>224</fpage>
          <lpage>33</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.15265/IY-2016-017"/>
          </comment>
          <pub-id pub-id-type="doi">10.15265/IY-2016-017</pub-id>
          <pub-id pub-id-type="medline">27830255</pub-id>
          <pub-id pub-id-type="pii">me2016-017</pub-id>
          <pub-id pub-id-type="pmcid">PMC5171557</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Döbrössy</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Girasek</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Susánszky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Koncz</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Győrffy</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Bognár</surname>
              <given-names>VK</given-names>
            </name>
          </person-group>
          <article-title>"Clicks, likes, shares and comments" a systematic review of breast cancer screening discourse in social media</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <month>4</month>
          <day>15</day>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>e0231422</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0231422"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0231422</pub-id>
          <pub-id pub-id-type="medline">32294139</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-01314</pub-id>
          <pub-id pub-id-type="pmcid">PMC7159232</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marsch</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Hancock</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Scaling up research on drug abuse and addiction through social media big data</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>10</month>
          <day>31</day>
          <volume>19</volume>
          <issue>10</issue>
          <fpage>e353</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2017/10/e353/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.6426</pub-id>
          <pub-id pub-id-type="medline">29089287</pub-id>
          <pub-id pub-id-type="pii">v19i10e353</pub-id>
          <pub-id pub-id-type="pmcid">PMC5686417</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>Number of social media users worldwide from 2017 to 2028</article-title>
          <source>Statista</source>
          <access-date>2024-11-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.statista.com/statistics/278414/number-of-worldwide-social-network-users/">https://www.statista.com/statistics/278414/number-of-worldwide-social-network-users/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <article-title>Social media fact sheet</article-title>
          <source>Pew Research Center</source>
          <access-date>2021-08-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/fact-sheet/social-media/">https://www.pewresearch.org/internet/fact-sheet/social-media/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bour</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ahne</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schmitz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Perchoux</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dessenne</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fagherazzi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The use of social media for health research purposes: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>05</month>
          <day>27</day>
          <volume>23</volume>
          <issue>5</issue>
          <fpage>e25736</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/5/e25736/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25736</pub-id>
          <pub-id pub-id-type="medline">34042593</pub-id>
          <pub-id pub-id-type="pii">v23i5e25736</pub-id>
          <pub-id pub-id-type="pmcid">PMC8193478</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cesare</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
          </person-group>
          <article-title>Understanding demographic bias and representation in social media health data</article-title>
          <source>Proceedings of the Companion Publication of the 10th ACM Conference on Web Science</source>
          <year>2019</year>
          <conf-name>WebSci '19</conf-name>
          <conf-date>June 30-July 3, 2019</conf-date>
          <conf-loc>Boston, MA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3328413.3328415</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Loke</surname>
              <given-names>YK</given-names>
            </name>
          </person-group>
          <article-title>Systematic review on the prevalence, frequency and comparative value of adverse events data in social media</article-title>
          <source>Br J Clin Pharmacol</source>
          <year>2015</year>
          <month>10</month>
          <volume>80</volume>
          <issue>4</issue>
          <fpage>878</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26271492"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/bcp.12746</pub-id>
          <pub-id pub-id-type="medline">26271492</pub-id>
          <pub-id pub-id-type="pmcid">PMC4594731</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frost</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Okun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vaughan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Heywood</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wicks</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Patient-reported outcomes as a source of evidence in off-label prescribing: analysis of data from PatientsLikeMe</article-title>
          <source>J Med Internet Res</source>
          <year>2011</year>
          <month>01</month>
          <day>21</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>e6</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2011/1/e6/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1643</pub-id>
          <pub-id pub-id-type="medline">21252034</pub-id>
          <pub-id pub-id-type="pii">v13i1e6</pub-id>
          <pub-id pub-id-type="pmcid">PMC3221356</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walsh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cave</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Griffiths</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Spontaneously generated online patient experience of modafinil: a qualitative and NLP analysis</article-title>
          <source>Front Digit Health</source>
          <year>2021</year>
          <month>02</month>
          <day>17</day>
          <volume>3</volume>
          <fpage>598431</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34713085"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fdgth.2021.598431</pub-id>
          <pub-id pub-id-type="medline">34713085</pub-id>
          <pub-id pub-id-type="pmcid">PMC8521895</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greenhalgh</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <source>Cultural Contexts of Health: The Use of Narrative Research in the Health Sector</source>
          <year>2016</year>
          <publisher-loc>Copenhagen, Denmark</publisher-loc>
          <publisher-name>WHO Regional Office for Europe</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Drewniak</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Glässel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hodel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Biller-Andorno</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Risks and benefits of web-based patient narratives: systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>03</month>
          <day>26</day>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>e15772</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/3/e15772/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/15772</pub-id>
          <pub-id pub-id-type="medline">32213468</pub-id>
          <pub-id pub-id-type="pii">v22i3e15772</pub-id>
          <pub-id pub-id-type="pmcid">PMC7146251</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McKenna</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Myers</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Newman</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Social media in qualitative research: challenges and recommendations</article-title>
          <source>Inf Organ</source>
          <year>2017</year>
          <month>06</month>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>87</fpage>
          <lpage>99</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1471772717300957"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.infoandorg.2017.03.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sackett</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenberg</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Haynes</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>WS</given-names>
            </name>
          </person-group>
          <article-title>Evidence based medicine: what it is and what it isn't</article-title>
          <source>BMJ</source>
          <year>1996</year>
          <month>01</month>
          <day>13</day>
          <volume>312</volume>
          <issue>7023</issue>
          <fpage>71</fpage>
          <lpage>2</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/8555924"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.312.7023.71</pub-id>
          <pub-id pub-id-type="medline">8555924</pub-id>
          <pub-id pub-id-type="pmcid">PMC2349778</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kones</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rumana</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Merino</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Exclusion of 'nonRCT evidence' in guidelines for chronic diseases - is it always appropriate? The Look AHEAD study</article-title>
          <source>Curr Med Res Opin</source>
          <year>2014</year>
          <month>10</month>
          <volume>30</volume>
          <issue>10</issue>
          <fpage>2009</fpage>
          <lpage>19</lpage>
          <pub-id pub-id-type="doi">10.1185/03007995.2014.925438</pub-id>
          <pub-id pub-id-type="medline">24841173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ogilvie</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bauman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gregg</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Panter</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Wareham</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Using natural experimental studies to guide public health action: turning the evidence-based medicine paradigm on its head</article-title>
          <source>J Epidemiol Community Health</source>
          <year>2020</year>
          <month>02</month>
          <volume>74</volume>
          <issue>2</issue>
          <fpage>203</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jech.bmj.com/lookup/pmidlookup?view=long&#38;pmid=31744848"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jech-2019-213085</pub-id>
          <pub-id pub-id-type="medline">31744848</pub-id>
          <pub-id pub-id-type="pii">jech-2019-213085</pub-id>
          <pub-id pub-id-type="pmcid">PMC6993029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schlegl</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ducournau</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ruof</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Different weights of the evidence-based medicine triad in regulatory, health technology assessment, and clinical decision making</article-title>
          <source>Pharmaceut Med</source>
          <year>2017</year>
          <volume>31</volume>
          <issue>4</issue>
          <fpage>213</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28824273"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40290-017-0197-3</pub-id>
          <pub-id pub-id-type="medline">28824273</pub-id>
          <pub-id pub-id-type="pii">197</pub-id>
          <pub-id pub-id-type="pmcid">PMC5539271</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <article-title>EBM+: integrating diverse evidence in evidence-based medicine</article-title>
          <source>University of Kent</source>
          <access-date>2024-11-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ebmplus.org/">https://ebmplus.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greenhalgh</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Will COVID-19 be evidence-based medicine's nemesis?</article-title>
          <source>PLoS Med</source>
          <year>2020</year>
          <month>06</month>
          <day>30</day>
          <volume>17</volume>
          <issue>6</issue>
          <fpage>e1003266</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1003266"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1003266</pub-id>
          <pub-id pub-id-type="medline">32603323</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-20-02946</pub-id>
          <pub-id pub-id-type="pmcid">PMC7326185</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anjum</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Copeland</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rocca</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Conclusion: causehealth recommendations for making causal evidence clinically relevant and informed</article-title>
          <source>Rethinking Causality, Complexity and Evidence for the Unique Patient</source>
          <year>2020</year>
          <month>06</month>
          <day>03</day>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <article-title>Real-world evidence</article-title>
          <source>U.S. Food and Drug Administration</source>
          <year>2024</year>
          <month>9</month>
          <day>19</day>
          <access-date>2024-11-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/science-research/science-and-research-special-topics/real-world-evidence">https://www.fda.gov/science-research/science-and-research-special-topics/real-world-evidence</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schilsky</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>Finding the evidence in real-world evidence: moving from data to information to knowledge</article-title>
          <source>J Am Coll Surg</source>
          <year>2017</year>
          <month>01</month>
          <volume>224</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jamcollsurg.2016.10.025</pub-id>
          <pub-id pub-id-type="medline">27989954</pub-id>
          <pub-id pub-id-type="pii">S1072-7515(16)31542-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miani</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Robin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Horvath</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Manville</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cave</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chataway</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Health and healthcare: assessing the real world data policy landscape in Europe</article-title>
          <source>Rand Health Q</source>
          <year>2014</year>
          <month>06</month>
          <day>01</day>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>15</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28083344"/>
          </comment>
          <pub-id pub-id-type="medline">28083344</pub-id>
          <pub-id pub-id-type="pmcid">PMC5052007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Averitt</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Perotte</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Translating evidence into practice: eligibility criteria fail to eliminate clinically significant differences between real-world and study populations</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <month>05</month>
          <day>11</day>
          <volume>3</volume>
          <fpage>67</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-0277-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0277-8</pub-id>
          <pub-id pub-id-type="medline">32411828</pub-id>
          <pub-id pub-id-type="pii">277</pub-id>
          <pub-id pub-id-type="pmcid">PMC7214444</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bender</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gebru</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>McMillan-Major</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shmitchell</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>On the dangers of stochastic parrots: can language models be too big?</article-title>
          <source>Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency</source>
          <year>2021</year>
          <conf-name>FAccT '21</conf-name>
          <conf-date>March 3-10, 2021</conf-date>
          <conf-loc>Virtual Event</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3442188.3445922</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Strubell</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ganesh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McCallum</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Energy and policy considerations for deep learning in NLP</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on June 5, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1906.02243"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1906.02243</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Writing linguistic rules for natural language processing</article-title>
          <source>Medium</source>
          <year>2019</year>
          <month>11</month>
          <day>28</day>
          <access-date>2020-04-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://towardsdatascience.com/linguistic-rule-writing-for-nlp-ml-64d9af824ee8">https://towardsdatascience.com/linguistic-rule-writing-for-nlp-ml-64d9af824ee8</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bender</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Koller</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Climbing towards NLU: on meaning, form, and understanding in the age of data</article-title>
          <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2020</year>
          <conf-name>ACL 2020</conf-name>
          <conf-date>July 5-10, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.463</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdellaoui</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Foulquié</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Texier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Faviez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burgun</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schück</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Detection of cases of noncompliance to drug treatment in patient forum posts: topic model approach</article-title>
          <source>J Med Internet Res</source>
          <year>2018</year>
          <month>03</month>
          <day>14</day>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>e85</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2018/3/e85/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.9222</pub-id>
          <pub-id pub-id-type="medline">29540337</pub-id>
          <pub-id pub-id-type="pii">v20i3e85</pub-id>
          <pub-id pub-id-type="pmcid">PMC5874436</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maier</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Waldherr</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Miltner</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wiedemann</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Niekler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Keinert</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pfetsch</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Heyer</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Reber</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Häussler</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Schmid-Petri</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Adam</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Applying LDA topic modeling in communication research: toward a valid and reliable methodology</article-title>
          <source>Commun Methods Meas</source>
          <year>2018</year>
          <month>02</month>
          <day>16</day>
          <volume>12</volume>
          <issue>2-3</issue>
          <fpage>93</fpage>
          <lpage>118</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1080/19312458.2018.1430754"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/19312458.2018.1430754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baumer</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Mimno</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Guha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Quan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gay</surname>
              <given-names>GK</given-names>
            </name>
          </person-group>
          <article-title>Comparing grounded theory and topic modeling: extreme divergence or unlikely convergence?</article-title>
          <source>J Assoc Inf Sci Technol</source>
          <year>2017</year>
          <month>04</month>
          <day>28</day>
          <volume>68</volume>
          <issue>6</issue>
          <fpage>1397</fpage>
          <lpage>410</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://doi.wiley.com/10.1002/asi.23786"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/asi.23786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Button</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Patient triage by topic modeling of referral letters: feasibility study</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>11</month>
          <day>06</day>
          <volume>8</volume>
          <issue>11</issue>
          <fpage>e21252</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/11/e21252/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21252</pub-id>
          <pub-id pub-id-type="medline">33155985</pub-id>
          <pub-id pub-id-type="pii">v8i11e21252</pub-id>
          <pub-id pub-id-type="pmcid">PMC7679210</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greaves</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ramirez-Cano</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Millett</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Darzi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Donaldson</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Use of sentiment analysis for capturing patient experience from free-text comments posted online</article-title>
          <source>J Med Internet Res</source>
          <year>2013</year>
          <month>11</month>
          <day>01</day>
          <volume>15</volume>
          <issue>11</issue>
          <fpage>e239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2013/11/e239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.2721</pub-id>
          <pub-id pub-id-type="medline">24184993</pub-id>
          <pub-id pub-id-type="pii">v15i11e239</pub-id>
          <pub-id pub-id-type="pmcid">PMC3841376</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kerry</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Anjum</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Copeland</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rocca</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Causal dispositionalism and evidence based healthcare</article-title>
          <source>Rethinking Causality, Complexity and Evidence for the Unique Patient</source>
          <year>2020</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deaton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cartwright</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Understanding and misunderstanding randomized controlled trials</article-title>
          <source>Soc Sci Med</source>
          <year>2018</year>
          <month>08</month>
          <volume>210</volume>
          <fpage>2</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0277-9536(17)30735-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.socscimed.2017.12.005</pub-id>
          <pub-id pub-id-type="medline">29331519</pub-id>
          <pub-id pub-id-type="pii">S0277-9536(17)30735-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6019115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anjum</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Anjum</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Copeland</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rocca</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Dispositions and the unique patient</article-title>
          <source>Rethinking Causality, Complexity and Evidence for the Unique Patient</source>
          <year>2020</year>
          <month>06</month>
          <day>03</day>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Living with complexity and big data</article-title>
          <source>Uppsala Monitoring Centre</source>
          <access-date>2018-11-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://view.publitas.com/uppsala-monitoring-centre/uppsala-reports-78/page/28">https://view.publitas.com/uppsala-monitoring-centre/uppsala-reports-78/page/28</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Neeleman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van de Koot</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The linguistic expression of causation</article-title>
          <source>The Theta System: Argument Structure at the Interface</source>
          <year>2012</year>
          <publisher-loc>Oxford, UK</publisher-loc>
          <publisher-name>Oxford University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Establishing causal claims in medicine</article-title>
          <source>Int Studies Philos Sci</source>
          <year>2019</year>
          <month>06</month>
          <day>27</day>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>33</fpage>
          <lpage>61</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1080/02698595.2019.1630927"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/02698595.2019.1630927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greenhalgh</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Snow</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rees</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Salisbury</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Six 'biases' against patients and carers in evidence-based medicine</article-title>
          <source>BMC Med</source>
          <year>2015</year>
          <month>09</month>
          <day>01</day>
          <volume>13</volume>
          <fpage>200</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-015-0437-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12916-015-0437-x</pub-id>
          <pub-id pub-id-type="medline">26324223</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12916-015-0437-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC4556220</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Social media: the voice of the patient</article-title>
          <source>Reuters Events</source>
          <year>2015</year>
          <month>7</month>
          <day>27</day>
          <access-date>2024-11-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.reutersevents.com/pharma/commercial/social-media-voice-patient#.VbX9zTjUk84.linkedin">https://www.reutersevents.com/pharma/commercial/social-media-voice-patient#.VbX9zTjUk84.linkedin</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Rossum</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Python reference manual</article-title>
          <source>Centrum Wiskunde &#38; Informatica</source>
          <year>1995</year>
          <access-date>2024-11-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ir.cwi.nl/pub/5008">https://ir.cwi.nl/pub/5008</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <article-title>jupyterlab</article-title>
          <source>GitHub</source>
          <access-date>2024-05-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/jupyterlab/jupyterlab">https://github.com/jupyterlab/jupyterlab</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Albalawi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yeap</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Benyoucef</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Using topic modeling methods for short-text data: a comparative analysis</article-title>
          <source>Front Artif Intell</source>
          <year>2020</year>
          <volume>3</volume>
          <fpage>42</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33733159"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/frai.2020.00042</pub-id>
          <pub-id pub-id-type="medline">33733159</pub-id>
          <pub-id pub-id-type="pmcid">PMC7861298</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khanbhai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Anyadi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Symons</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Flott</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Darzi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Applying natural language processing and machine learning techniques to patient experience feedback: a systematic review</article-title>
          <source>BMJ Health Care Inform</source>
          <year>2021</year>
          <month>03</month>
          <day>02</day>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>e100262</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://informatics.bmj.com/lookup/pmidlookup?view=long&#38;pmid=33653690"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjhci-2020-100262</pub-id>
          <pub-id pub-id-type="medline">33653690</pub-id>
          <pub-id pub-id-type="pii">bmjhci-2020-100262</pub-id>
          <pub-id pub-id-type="pmcid">PMC7929894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Experimental explorations on short text topic mining between LDA and NMF based schemes</article-title>
          <source>Knowl Based Syst</source>
          <year>2019</year>
          <month>01</month>
          <volume>163</volume>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.sciencedirect.com/science/article/pii/S0950705118304076"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.knosys.2018.08.011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rempel</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Carenini</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Janjua</surname>
              <given-names>NZ</given-names>
            </name>
          </person-group>
          <article-title>Tracking COVID-19 discourse on Twitter in North America: infodemiology study using topic modeling and aspect-based sentiment analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>02</month>
          <day>10</day>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>e25431</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/2/e25431/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25431</pub-id>
          <pub-id pub-id-type="medline">33497352</pub-id>
          <pub-id pub-id-type="pii">v23i2e25431</pub-id>
          <pub-id pub-id-type="pmcid">PMC7879725</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suri</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>NR</given-names>
            </name>
          </person-group>
          <article-title>Comparison between LDA and NMF for event-detection from large text stream data</article-title>
          <source>Proceedings of the 3rd International Conference on Computational Intelligence &#38; Communication Technology (CICT)</source>
          <year>2017</year>
          <conf-name>CICT 2017</conf-name>
          <conf-date>February 9-10, 2017</conf-date>
          <conf-loc>Ghaziabad, India</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ciact.2017.7977281</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birks</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Unsupervised identification of crime problems from police free-text data</article-title>
          <source>Crime Sci</source>
          <year>2020</year>
          <month>10</month>
          <day>07</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1186/s40163-020-00127-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s40163-020-00127-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bakharia</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>On the equivalence of inductive content analysis and topic modeling</article-title>
          <source>Proceedings of the First International Conference on Advances in Quantitative Ethnography</source>
          <year>2019</year>
          <conf-name>ICQE 2019</conf-name>
          <conf-date>October 20-22, 2019</conf-date>
          <conf-loc>Madison, WI</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://link.springer.com/chapter/10.1007/978-3-030-33232-7_25"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/978-3-030-33232-7_25</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rehurek</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>gensim: python framework for vector space modelling</article-title>
          <source>Machine Learning Open Source Software</source>
          <year>2010</year>
          <month>9</month>
          <day>7</day>
          <access-date>2020-06-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mloss.org/revision/view/546/">https://mloss.org/revision/view/546/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in python</article-title>
          <source>J Mach Learn Res</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>30</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Angelov</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Top2Vec: distributed representations of topics</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on August 19, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2008.09470"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="web">
          <article-title>A guide on word embeddings in NLP</article-title>
          <source>Turing</source>
          <access-date>2024-05-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.turing.com/kb/guide-on-word-embeddings-in-nlp">https://www.turing.com/kb/guide-on-word-embeddings-in-nlp</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sievert</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shirley</surname>
              <given-names>KE</given-names>
            </name>
          </person-group>
          <article-title>LDAvis: a method for visualizing and interpreting topics</article-title>
          <source>Proceedings of the Workshop on Interactive Language Learning, Visualization, and Interfaces</source>
          <year>2014</year>
          <conf-name>ACL 2014</conf-name>
          <conf-date>June 27, 2014</conf-date>
          <conf-loc>Baltimore, MD</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://nlp.stanford.edu/events/illvi2014/papers/sievert-illvi2014.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/w14-3110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loria</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>TextBlob: simplified text processing</article-title>
          <source>TextBlob</source>
          <access-date>2021-11-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://textblob.readthedocs.io/en/dev/index.html">https://textblob.readthedocs.io/en/dev/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>VADER: a parsimonious rule-based model for sentiment analysis of social media text</article-title>
          <source>Proc Int AAAI Conf Web Soc Media</source>
          <year>2014</year>
          <month>05</month>
          <day>16</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>216</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aaai.org/ocs/index.php/ICWSM/ICWSM14/paper/viewPaper/8109"/>
          </comment>
          <pub-id pub-id-type="doi">10.1609/icwsm.v8i1.14550</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bonta</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kumaresh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Janardhan</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>A comprehensive study on lexicon based approaches for sentiment analysis</article-title>
          <source>Asian J Comput Sci Technol</source>
          <year>2019</year>
          <volume>8</volume>
          <issue>S2</issue>
          <fpage>1</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/profile/Janardhan_n/publication/333602124_A_Comprehensive_Study_on_Lexicon_Based_Approaches_for_Sentiment_Analysis/links/5d1346ce299bf1547c7f931a/A-Comprehensive-Study-on-Lexicon-Based-Approaches-for-Sentiment-Analysis.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.51983/ajcst-2019.8.S2.2037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soma</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Comparing sentiment analysis tools</article-title>
          <source>Data Science for Journalism</source>
          <access-date>2021-06-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://investigate.ai/investigating-sentiment-analysis/comparing-sentiment-analysis-tools/">https://investigate.ai/investigating-sentiment-analysis/comparing-sentiment-analysis-tools/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caren</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Word lists and sentiment analysis</article-title>
          <source>Neal Caren</source>
          <year>2019</year>
          <month>5</month>
          <day>1</day>
          <access-date>2021-07-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://nealcaren.org/lessons/wordlists/">https://nealcaren.org/lessons/wordlists/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="web">
          <source>Sketch Engine</source>
          <access-date>2024-11-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sketchengine.eu/">https://www.sketchengine.eu/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="web">
          <article-title>enTenTen: corpus of the English web</article-title>
          <source>Sketch Engine</source>
          <access-date>2018-02-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sketchengine.eu/ententen-english-corpus/">https://www.sketchengine.eu/ententen-english-corpus/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="web">
          <article-title>What is an N-Gram?</article-title>
          <source>MathWorks</source>
          <access-date>2024-05-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://uk.mathworks.com/discovery/ngram.html">https://uk.mathworks.com/discovery/ngram.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alper</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Fedorowicz</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>van Zuuren</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Limitations in conduct and reporting of cochrane reviews rarely inhibit the determination of the validity of evidence for clinical decision-making</article-title>
          <source>J Evid Based Med</source>
          <year>2015</year>
          <month>08</month>
          <day>21</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>154</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1111/jebm.12161</pub-id>
          <pub-id pub-id-type="medline">26107648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="web">
          <article-title>Cochrane library homepage</article-title>
          <source>Cochrane Library</source>
          <access-date>2024-11-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cochranelibrary.com/">https://www.cochranelibrary.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruthirakuhan</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Herrmann</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lanctôt</surname>
              <given-names>KL</given-names>
            </name>
          </person-group>
          <article-title>Pharmacological interventions for apathy in Alzheimer's disease</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2018</year>
          <month>05</month>
          <day>04</day>
          <volume>5</volume>
          <issue>5</issue>
          <fpage>CD012197</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29727467"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD012197.pub2</pub-id>
          <pub-id pub-id-type="medline">29727467</pub-id>
          <pub-id pub-id-type="pmcid">PMC6494556</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castells</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cunill</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pérez-Mañá</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vidal</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Capellà</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Psychostimulant drugs for cocaine dependence</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2016</year>
          <month>09</month>
          <day>27</day>
          <volume>9</volume>
          <issue>9</issue>
          <fpage>CD007380</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27670244"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD007380.pub4</pub-id>
          <pub-id pub-id-type="medline">27670244</pub-id>
          <pub-id pub-id-type="pmcid">PMC6457633</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Day</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yust-Katz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cachia</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wefel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tremont Lukats</surname>
              <given-names>IW</given-names>
            </name>
            <name name-style="western">
              <surname>Bulbeck</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rooney</surname>
              <given-names>AG</given-names>
            </name>
          </person-group>
          <article-title>Interventions for the management of fatigue in adults with a primary brain tumour</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2022</year>
          <month>09</month>
          <day>12</day>
          <volume>9</volume>
          <issue>9</issue>
          <fpage>CD011376</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36094728"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD011376.pub3</pub-id>
          <pub-id pub-id-type="medline">36094728</pub-id>
          <pub-id pub-id-type="pmcid">PMC9466986</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dougall</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Poole</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Agrawal</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Pharmacotherapy for chronic cognitive impairment in traumatic brain injury</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2015</year>
          <month>12</month>
          <day>01</day>
          <volume>2015</volume>
          <issue>12</issue>
          <fpage>CD009221</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26624881"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD009221.pub2</pub-id>
          <pub-id pub-id-type="medline">26624881</pub-id>
          <pub-id pub-id-type="pmcid">PMC11092927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elbers</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Verhoef</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van Wegen</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Berendse</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Kwakkel</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Interventions for fatigue in Parkinson's disease</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2015</year>
          <month>10</month>
          <day>08</day>
          <volume>2015</volume>
          <issue>10</issue>
          <fpage>CD010925</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26447539"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD010925.pub2</pub-id>
          <pub-id pub-id-type="medline">26447539</pub-id>
          <pub-id pub-id-type="pmcid">PMC9240814</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mücke</surname>
              <given-names>M</given-names>
            </name>
            <collab>Mochamat</collab>
            <name name-style="western">
              <surname>Cuhls</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Peuckmann-Post</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Minton</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Radbruch</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Pharmacological treatments for fatigue associated with palliative care</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2015</year>
          <month>05</month>
          <day>30</day>
          <volume>2015</volume>
          <issue>5</issue>
          <fpage>CD006788</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26026155"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD006788.pub3</pub-id>
          <pub-id pub-id-type="medline">26026155</pub-id>
          <pub-id pub-id-type="pmcid">PMC6483317</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koopman</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Beelen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gilhus</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>de Visser</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nollet</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Treatment for postpolio syndrome</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2015</year>
          <month>05</month>
          <day>18</day>
          <volume>2015</volume>
          <issue>5</issue>
          <fpage>CD007818</fpage>
          <pub-id pub-id-type="doi">10.1002/14651858.CD007818.pub3</pub-id>
          <pub-id pub-id-type="medline">25984923</pub-id>
          <pub-id pub-id-type="pmcid">PMC11236427</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Day</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zienius</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gehring</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Grosshans</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Taphoorn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>PD</given-names>
            </name>
          </person-group>
          <article-title>Interventions for preventing and ameliorating cognitive deficits in adults treated with cranial irradiation</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2014</year>
          <month>12</month>
          <day>18</day>
          <volume>2014</volume>
          <issue>12</issue>
          <fpage>CD011335</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25519950"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD011335.pub2</pub-id>
          <pub-id pub-id-type="medline">25519950</pub-id>
          <pub-id pub-id-type="pmcid">PMC6457828</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pérez-Mañá</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Castells</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Torrens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Capellà</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Farre</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Efficacy of psychostimulant drugs for amphetamine abuse or dependence</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2013</year>
          <month>09</month>
          <day>02</day>
          <volume>2013</volume>
          <issue>9</issue>
          <fpage>CD009695</fpage>
          <pub-id pub-id-type="doi">10.1002/14651858.CD009695.pub2</pub-id>
          <pub-id pub-id-type="medline">23996457</pub-id>
          <pub-id pub-id-type="pmcid">PMC11521360</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ortiz-Orendain</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Covarrubias-Castillo</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Vazquez-Alvarez</surname>
              <given-names>AO</given-names>
            </name>
            <name name-style="western">
              <surname>Castiello-de Obeso</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Arias Quiñones</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Seegers</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Colunga-Lozano</surname>
              <given-names>LE</given-names>
            </name>
          </person-group>
          <article-title>Modafinil for people with schizophrenia or related disorders</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2019</year>
          <month>12</month>
          <day>12</day>
          <volume>12</volume>
          <issue>12</issue>
          <fpage>CD008661</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31828767"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD008661.pub2</pub-id>
          <pub-id pub-id-type="medline">31828767</pub-id>
          <pub-id pub-id-type="pmcid">PMC6906203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castells</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Blanco-Silvente</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cunill</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Amphetamines for attention deficit hyperactivity disorder (ADHD) in adults</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2018</year>
          <month>08</month>
          <day>09</day>
          <volume>8</volume>
          <issue>8</issue>
          <fpage>CD007813</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30091808"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD007813.pub3</pub-id>
          <pub-id pub-id-type="medline">30091808</pub-id>
          <pub-id pub-id-type="pmcid">PMC6513464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gibbons</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pagnini</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Friede</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>CA</given-names>
            </name>
          </person-group>
          <article-title>Treatment of fatigue in amyotrophic lateral sclerosis/motor neuron disease</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2018</year>
          <month>01</month>
          <day>02</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>CD011005</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29293261"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD011005.pub2</pub-id>
          <pub-id pub-id-type="medline">29293261</pub-id>
          <pub-id pub-id-type="pmcid">PMC6494184</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref89">
        <label>89</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liira</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Verbeek</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Driscoll</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Sallinen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Isotalo</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>Ruotsalainen</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Pharmacological interventions for sleepiness and sleep disturbances caused by shift work</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2014</year>
          <month>08</month>
          <day>12</day>
          <volume>2014</volume>
          <issue>8</issue>
          <fpage>CD009776</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25113164"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD009776.pub2</pub-id>
          <pub-id pub-id-type="medline">25113164</pub-id>
          <pub-id pub-id-type="pmcid">PMC10025070</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref90">
        <label>90</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ker</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Felix</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Blackhall</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Caffeine for the prevention of injuries and errors in shift workers</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2010</year>
          <month>05</month>
          <day>12</day>
          <volume>2010</volume>
          <issue>5</issue>
          <fpage>CD008508</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20464765"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD008508</pub-id>
          <pub-id pub-id-type="medline">20464765</pub-id>
          <pub-id pub-id-type="pmcid">PMC4160007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref91">
        <label>91</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Candy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tookman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Psychostimulants for depression</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2008</year>
          <month>04</month>
          <day>16</day>
          <issue>2</issue>
          <fpage>CD006722</fpage>
          <pub-id pub-id-type="doi">10.1002/14651858.CD006722.pub2</pub-id>
          <pub-id pub-id-type="medline">18425966</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref92">
        <label>92</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Annane</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>Psychostimulants for hypersomnia (excessive daytime sleepiness) in myotonic dystrophy</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2006</year>
          <month>07</month>
          <day>19</day>
          <volume>2006</volume>
          <issue>3</issue>
          <fpage>CD003218</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/16855999"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD003218.pub2</pub-id>
          <pub-id pub-id-type="medline">16855999</pub-id>
          <pub-id pub-id-type="pmcid">PMC9006877</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref93">
        <label>93</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Vaculik</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Khalil</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zektser</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Arnold</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Almario</surname>
              <given-names>CV</given-names>
            </name>
            <name name-style="western">
              <surname>Spiegel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Anger</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Using large-scale social media analytics to understand patient perspectives about urinary tract infections: thematic analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <month>01</month>
          <day>25</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>e26781</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/1/e26781/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26781</pub-id>
          <pub-id pub-id-type="medline">35076404</pub-id>
          <pub-id pub-id-type="pii">v24i1e26781</pub-id>
          <pub-id pub-id-type="pmcid">PMC8826307</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref94">
        <label>94</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Egger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A topic modeling comparison between LDA, NMF, Top2Vec, and BERTopic to demystify Twitter posts</article-title>
          <source>Front Sociol</source>
          <year>2022</year>
          <volume>7</volume>
          <fpage>886498</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35602001"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fsoc.2022.886498</pub-id>
          <pub-id pub-id-type="medline">35602001</pub-id>
          <pub-id pub-id-type="pmcid">PMC9120935</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref95">
        <label>95</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Isoaho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gritsenko</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mäkelä</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Topic modeling and text analysis for qualitative policy research</article-title>
          <source>Policy Stud J</source>
          <year>2019</year>
          <month>06</month>
          <day>19</day>
          <volume>49</volume>
          <issue>1</issue>
          <fpage>300</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.1111/psj.12343"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/psj.12343</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref96">
        <label>96</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wall</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Murali</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Collecting and analyzing patient experiences of health care from social media</article-title>
          <source>JMIR Res Protoc</source>
          <year>2015</year>
          <month>07</month>
          <day>02</day>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>e78</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchprotocols.org/2015/3/e78/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/resprot.3433</pub-id>
          <pub-id pub-id-type="medline">26137885</pub-id>
          <pub-id pub-id-type="pii">v4i3e78</pub-id>
          <pub-id pub-id-type="pmcid">PMC4526973</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref97">
        <label>97</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dirkson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Verberne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kraaij</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Lexical normalization of user-generated medical text</article-title>
          <source>Proceedings of the Fourth Social Media Mining for Health Applications (#SMM4H) Workshop &#38; Shared Task</source>
          <year>2019</year>
          <conf-name>SMM4H@ACL 2019</conf-name>
          <conf-date>August 2, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1812.04265#page=5"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref98">
        <label>98</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dalmer</surname>
              <given-names>NK</given-names>
            </name>
          </person-group>
          <article-title>Questioning reliability assessments of health information on social media</article-title>
          <source>J Med Libr Assoc</source>
          <year>2017</year>
          <month>01</month>
          <day>17</day>
          <volume>105</volume>
          <issue>1</issue>
          <fpage>61</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28096748"/>
          </comment>
          <pub-id pub-id-type="doi">10.5195/jmla.2017.108</pub-id>
          <pub-id pub-id-type="medline">28096748</pub-id>
          <pub-id pub-id-type="pii">jmla-105-61</pub-id>
          <pub-id pub-id-type="pmcid">PMC5234445</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref99">
        <label>99</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>McKee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Torbica</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stuckler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Systematic literature review on the spread of health-related misinformation on social media</article-title>
          <source>Soc Sci Med</source>
          <year>2019</year>
          <month>11</month>
          <volume>240</volume>
          <fpage>112552</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0277-9536(19)30546-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.socscimed.2019.112552</pub-id>
          <pub-id pub-id-type="medline">31561111</pub-id>
          <pub-id pub-id-type="pii">S0277-9536(19)30546-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7117034</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref100">
        <label>100</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Staccini</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez-Luque</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Secondary use of recorded or self-expressed personal data: consumer health informatics and education in the era of social media and health apps</article-title>
          <source>Yearb Med Inform</source>
          <year>2017</year>
          <month>09</month>
          <day>11</day>
          <volume>26</volume>
          <issue>01</issue>
          <fpage>172</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.15265/iy-2017-037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref101">
        <label>101</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Mierlo</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The 1% rule in four digital health social networks: an observational study</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>02</month>
          <day>04</day>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>e33</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/2/e33/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.2966</pub-id>
          <pub-id pub-id-type="medline">24496109</pub-id>
          <pub-id pub-id-type="pii">v16i2e33</pub-id>
          <pub-id pub-id-type="pmcid">PMC3939180</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
