<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i5e20803</article-id>
      <article-id pub-id-type="pmid">33999001</article-id>
      <article-id pub-id-type="doi">10.2196/20803</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Tutorial</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Tutorial</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Determination of Patient Sentiment and Emotion in Ophthalmology: Infoveillance Tutorial on Web-Based Health Forum Discussions</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Benis</surname>
            <given-names>Arriel</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gore</surname>
            <given-names>Ross</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>Anne Xuan-Lan</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3999-946X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Trinh</surname>
            <given-names>Xuan-Vi</given-names>
          </name>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3899-5333</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Sophia Y</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0916-9403</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Albert Y</given-names>
          </name>
          <degrees>MD, PhD, FACS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Department of Ophthalmology</institution>
            <institution>Byers Eye Institute</institution>
            <institution>Stanford University</institution>
            <addr-line>2452 Watson Court</addr-line>
            <addr-line>Palo Alto, CA, 94303</addr-line>
            <country>United States</country>
            <phone>1 650 497 0758</phone>
            <email>awu1@stanford.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1360-8248</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Faculty of Medicine</institution>
        <institution>McGill University</institution>
        <addr-line>Montreal, QC</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer Science</institution>
        <institution>McGill University</institution>
        <addr-line>Montreal, QC</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Ophthalmology</institution>
        <institution>Byers Eye Institute</institution>
        <institution>Stanford University</institution>
        <addr-line>Palo Alto, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Albert Y Wu <email>awu1@stanford.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>17</day>
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>5</issue>
      <elocation-id>e20803</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>8</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>27</day>
          <month>8</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>16</day>
          <month>3</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Anne Xuan-Lan Nguyen, Xuan-Vi Trinh, Sophia Y Wang, Albert Y Wu. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 17.05.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/5/e20803" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Clinical data in social media are an underused source of information with great potential to allow for a deeper understanding of patient values, attitudes, and preferences.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This tutorial aims to describe a novel, robust, and modular method for the sentiment analysis and emotion detection of free text from web-based forums and the factors to consider during its application.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We mined the discussion and user information of all posts containing search terms related to a medical subspecialty (oculoplastics) from MedHelp, the largest web-based platform for patient health forums. We used data cleaning and processing tools to define the relevant subset of results and prepare them for sentiment analysis. We executed sentiment and emotion analyses by using IBM Watson Natural Language Understanding to generate sentiment and emotion scores for the posts and their associated keywords. The keywords were aggregated using natural language processing tools.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Overall, 39 oculoplastic-related search terms resulted in 46,381 eligible posts within 14,329 threads. Posts were written by 18,319 users (117 doctors; 18,202 patients) and included 201,611 associated keywords. Keywords that occurred ≥500 times in the corpus were used to identify the most prominent topics, including specific symptoms, medication, and complications. The sentiment and emotion scores of these keywords and eligible posts were analyzed to provide concrete examples of the potential of this methodology to allow for a better understanding of patients’ attitudes. The overall sentiment score reflects a positive, neutral, or negative sentiment, whereas the emotion scores (anger, disgust, fear, joy, and sadness) represent the likelihood of the presence of the emotion. In keyword grouping analyses, medical signs, symptoms, and diseases had the lowest overall sentiment scores (−0.598). Complications were highly associated with sadness (0.485). Forum posts mentioning body parts were related to sadness (0.416) and fear (0.321). Administration was the category with the highest anger score (0.146). The top 6 forum subgroups had an overall negative sentiment score; the most negative one was the <italic>Neurology</italic> forum, with a score of −0.438. The <italic>Undiagnosed Symptoms</italic> forum had the highest sadness score (0.448). The least likely fearful posts were those from the <italic>Eye Care</italic> forum, with a score of 0.260. The overall sentiment score was much more negative before the doctor replied. The anger, disgust, fear, and sadness emotion scores decreased in likelihood, whereas joy was slightly more likely to be expressed after doctors replied.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This report allows physicians and researchers to efficiently mine and perform sentiment analysis on social media to better understand patients’ perspectives and promote patient-centric care. Important factors to be considered during its application include evaluating the scope of the search; selecting search terms and understanding their linguistic usages; and establishing selection, filtering, and processing criteria for posts and keywords tailored to the desired results.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>sentiment analysis</kwd>
        <kwd>emotions analysis</kwd>
        <kwd>natural language processing</kwd>
        <kwd>online forums</kwd>
        <kwd>social media</kwd>
        <kwd>patient attitudes</kwd>
        <kwd>medicine</kwd>
        <kwd>infodemiology</kwd>
        <kwd>infoveillance</kwd>
        <kwd>digital health</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Understanding patient attitudes and expectations toward health care is an important component of promoting patient-centric care and patient satisfaction. However, studies have shown that physicians have difficulties in understanding patients’ health beliefs and concerns [<xref ref-type="bibr" rid="ref1">1</xref>]. Strategies to improve the understanding of patient attitudes have traditionally required the development of specialized survey instruments, which may nonetheless be limited in scope, or focus groups, which can be very time consuming and laborious [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
      <p>The internet has now become a rich additional source of information regarding patients’ attitudes and expectations toward health care. Recent decades have seen a rapid increase in internet engagement, with an estimated 5 billion people using mobile devices [<xref ref-type="bibr" rid="ref3">3</xref>], and more than half of the global population actively using the internet [<xref ref-type="bibr" rid="ref4">4</xref>]. In 2012, 72% of American internet users sought health information on the web [<xref ref-type="bibr" rid="ref5">5</xref>] and many also increasingly expressed their medical concerns on the web [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. These web-based communication outlets include social networks (eg, Facebook, Twitter, or Instagram), doctor review websites (eg, Healthgrades, Vitals, or RateMDs), and health web forums (eg, MedHelp, Health245, or Patient info). Analyzing people’s health-related queries and reports on the internet to better inform public health and public policy is an increasingly popular field known as infoveillance [<xref ref-type="bibr" rid="ref8">8</xref>]. Although Twitter is a common and popular platform based on which many infoveillance studies are conducted, its space-limited format contrasts with web-based health forums, which are a particularly rich resource for understanding patient attitudes toward medical issues by supporting patients in directly seeking medical advice, sharing their medical experiences, and discussing their symptoms at length [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      <p>Understanding unstructured clinical data on social media requires natural language processing (NLP), a well-established branch of artificial intelligence that has been applied in a variety of fields and has emerging applications in medicine [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Sentiment and emotion analyses, which are subbranches of NLP, can identify and quantify positive, neutral, and negative sentiments and can detect emotions such as anger, disgust, fear, joy, and sadness in free text [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. The data mining and sentiment analysis of social media, especially web-based medical discussion forums, can provide a fast and effective way to better understand patients’ attitudes, expectations, and experiences [<xref ref-type="bibr" rid="ref18">18</xref>], which can better guide patient-centric care [<xref ref-type="bibr" rid="ref20">20</xref>]. The literature shows that health care professionals can, with the sentiment analysis of web-based medical forums, discover new outlooks of patient issues and recurrent complications related to specific treatment uses and drugs [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>] and administrative burden and access to care [<xref ref-type="bibr" rid="ref23">23</xref>]. By analyzing forum posts, physicians can further understand patients’ attitudes and experiences and assess their needs and concerns, which can result in better patient-centric care [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
      <p>We examined all oculoplastics-related posts on MedHelp, which included questions from patients and replies written by patients and doctors. Oculoplastics is a subspecialty in ophthalmology that involves the eyelids, face, tear ducts, and orbit and is both highly specialized and interdisciplinary as a clinical domain, often at the intersection of ophthalmology, plastic surgery, dermatology, and otolaryngology. Our study illustrates the challenges of identifying and distinguishing text related to specialized medical subdomains, such as ophthalmology, in the context of patient-centric idiomatic language and of web-based discussion forum analysis, where the relevance of text must be filtered on multiple structural levels and physician and patient posts must be distinguished from physicians’ posts. We provide all scripts and describe a detailed approach toward web-based patient forum sentiment analysis, which includes data collection; rigorous data processing, cleaning, and selection; and in-depth data analysis. This methodology allows for a variety of applications, notably the identification and analysis of the main topics related to the chosen field (eg, symptoms, complications, and medication) and their associated quantified sentiment (positive, neutral, or negative) and the likelihood of the presence of certain emotions (joy, anger, disgust, sadness, and fear). This methodology can also be used as a means to measure patient satisfaction and perspective by comparing patients’ sentiment and emotions before, during, and after their interaction with health care professionals. This paper aims to guide physicians and researchers to mine and perform sentiment analysis on web-based clinical data in a chosen field and highlights the challenges and approaches to consider in the process.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source and Study Population</title>
        <p>Founded in 1994, MedHelp is the world’s largest web-based health community [<xref ref-type="bibr" rid="ref25">25</xref>]. With more than 15 million visits per month, it allows users (patients and doctors) to discuss issues related to various health and wellness topics on a daily basis [<xref ref-type="bibr" rid="ref18">18</xref>]. Currently, this platform contains 299 official support communities, including a wide variety of well-established medical discussion forums. The main oculoplastic discussion forum is the <italic>Eye Care Community</italic>, which encourages patients to discuss eye-related issues. Another vision-related forum was the <italic>Ask a Doctor</italic>-<italic>Eye Care Forum</italic>, which benefited from a collaboration with ophthalmologists from the American Academy of Ophthalmology from 2007 to 2014 [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. In addition to these forums, MedHelp has more than 1000 user-made groups.</p>
        <p>Each community or group, also referred to as <italic>a forum</italic>, encompasses various discussion <italic>threads</italic>. Discussion threads comprise a question asked by a user (the initial <italic>post</italic>), followed by replies written by individual users, which are also considered <italic>posts</italic> [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
      </sec>
      <sec>
        <title>Approach to Data Extraction</title>
        <p>The approach to data extraction from MedHelp is summarized in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Discussion threads related to oculoplastic surgery were identified from MedHelp using a list of oculoplastics-relevant search terms created by consensus between 2 specialized ophthalmologists, AYW and SYW, and AXN (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Flowchart for the data extraction of discussion threads and posts on web-based medical forums. SQL: Structured Query Language.</p>
          </caption>
          <graphic xlink:href="jmir_v23i5e20803_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Each discussion thread was parsed using a Python script (Python Software Foundation, version 3.8.6) [<xref ref-type="bibr" rid="ref27">27</xref>] and the Python package <italic>Beautiful Soup</italic> [<xref ref-type="bibr" rid="ref28">28</xref>] to yield the full text of each post (including the initial question and all replies) and the relevant metadata, including the MedHelp user for each post and the forum that each thread belonged to.</p>
        <p>An initial review of the search results demonstrated that not all results appeared to be relevant, and it was noted that the details of the exact algorithm used by MedHelp’s proprietary search engine could not be known. Thus, we performed additional filtering of the search results to remove irrelevant discussion threads. Threads in animal forums, duplicate threads, and threads where the search terms were mentioned in purely idiomatic ways were removed.</p>
        <p>In addition, we noted that many threads were returned as search results because search terms appeared in different posts within the same thread, for example, the search term “double eye lid” could return a thread containing the use of “double,” “eye,” and “lid” in separate posts, which could result in many irrelevant posts.</p>
        <p>Therefore, to further filter the posts to include those that were most highly relevant to oculoplastics, we developed additional lists of related terms and text patterns and identified all the posts that contained exact matches to these patterns (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendices 2</xref>-<xref ref-type="supplementary-material" rid="app3">3</xref>) after lowercasing all the posts. Posts were deemed relevant and included for analyses if they were (1) in a thread whose title or initial question contained an exact pattern match (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) or (2) the post itself contained an exact pattern match to a very specific oculoplastics-related term (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). Posts that were not part of a relevant thread were subject to more stringent inclusion criteria because the original topic of the thread did not necessarily pertain to oculoplastics. This filtering algorithm ensures that the data set is relevant and tailored and is not influenced by the proprietary search algorithm of the platform.</p>
        <p>Patterns required for inclusion of posts allowed for some variability in human language, for example, the two patterns “%upper lid%eye” and “eye%upper lid%” (“%” denotes 0 or more of any character) match a subset of posts expressing one’s upper eyelid, such as “my eye hurts, and my upper lid...” and “my upper lid droops, and my eye keeps twitching,” without deeming posts containing solely “upper lid” as relevant, such as “the upper lid of my jar....” After excluding irrelevant posts, we extracted the username, user type (doctor or patient), self-reported age, sex, registration date to the MedHelp community, and user location from each user profile. All data were stored in an SQLite relational database [<xref ref-type="bibr" rid="ref29">29</xref>]. The scripts used to extract threads, posts, and users and the detailed instructions on how to use them can be found in our repository [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      </sec>
      <sec>
        <title>Approach to Natural Language Understanding Processing</title>
        <p>The approach to NLP and sentiment analysis is presented in <xref rid="figure2" ref-type="fig">Figure 2</xref> [<xref ref-type="bibr" rid="ref31">31</xref>]. We used IBM’s Watson Natural Language Understanding (NLU; IBM Cloud Natural Language Understanding V1, version 2019-07-12) [<xref ref-type="bibr" rid="ref32">32</xref>] to perform sentiment and emotion analyses on the free text of every included forum post. The Watson machine learning system reads and understands the semantics of free text by breaking down sentences structurally, grammatically, and contextually through various linguistic models and algorithms. The results that were returned included a sentiment score for the full document (ie, the full text of a single post) and for each keyword extracted by the IBM Watson algorithm and emotion scores for anger, sadness, joy, fear, and disgust at both the post and keyword levels. These keywords include important words, entities, and phrases from each post. Sentiment scores ranged from −1 to +1 on an arbitrary linear scale of intensity and were negative (less than 0), neutral (0), or positive (greater than 0). For each emotion, a score was given in the form of a percentage of likelihood, ranging from 0 to 1, where 0 represents the certain absence of the emotion in question and 1 represents the definite presence of the emotion.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Flowchart describing keyword processing and sentiment analysis.</p>
          </caption>
          <graphic xlink:href="jmir_v23i5e20803_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>NLU Keywords Processing</title>
        <p>Related keywords generated by the IBM Watson NLU program were processed using a Jupyter Notebook [<xref ref-type="bibr" rid="ref33">33</xref>] with Natural Language Toolkit (NLTK) [<xref ref-type="bibr" rid="ref34">34</xref>], NumPy [<xref ref-type="bibr" rid="ref35">35</xref>], and Pandas [<xref ref-type="bibr" rid="ref36">36</xref>] libraries. The following transformations were applied to each keyword: lowercasing, punctuation removal, stop word deletion (eg, prepositions and conjunctions), and lemmatization [<xref ref-type="bibr" rid="ref37">37</xref>] (morphological destructuring that allows words to be stripped down to their root word, eg, “oculoplastics” into “oculoplastic”).</p>
      </sec>
      <sec>
        <title>NLU Keywords Selection and Categorization</title>
        <p>Among the keywords with a frequency higher than 500, manual verification was performed to merge the keywords with the same semantic meaning. These keywords were then classified into various categories (groups and subgroups). For example, the “people” group encompasses multiple subgroups including the “eye care provider” subgroup, which in turn contains the fully processed keywords “ophthalmologist” and “optometrist.” However, keywords with a questionable relevancy to the clinical field and keywords with a general meaning (eg, “thing,” “thought,” and “name”) were excluded from the analysis.</p>
      </sec>
      <sec>
        <title>Sentiment Scores Statistical Analysis</title>
        <p>We used Python to aggregate and calculate the mean and standard deviation of each keyword’s associated sentiment and emotion scores (sentiment, sadness, fear, anger, joy, and disgust scores). Three examples of the analyses were performed with the results. We performed a summary of statistics by keyword grouping to determine significant trends among the chosen clinical categories. We also analyzed the data by forum subgroups (eg, posts in the <italic>Eye Care</italic> forum vs posts in the <italic>Neurology</italic> forum). We also compared the sentiment associated with the posts written by the patient before a doctor replied with the patient’s posts written after a doctor replied.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Results From Data Extraction</title>
        <sec>
          <title>Threads Extraction and Filtering</title>
          <p>Searching the 300 forums (including ongoing communities, discontinued forums, and user-made groups) on MedHelp using 39 oculoplastics-related search terms resulted in 22,623 discussion threads (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The screening for irrelevant threads resulted in the exclusion of 6 duplicate threads, 330 threads found in animal-related forums, and 92 threads containing the search term used exclusively as an idiom. <xref ref-type="table" rid="table1">Table 1</xref> highlights threads containing the common idioms associated with the initial search term lists and excluded forums (<italic>Animal Health—General</italic>, <italic>Animal Lovers Group</italic>, <italic>Animal-Surgery</italic>, <italic>Birds</italic>, <italic>Cats</italic>, <italic>Dogs</italic>), as well as example text from the excluded threads and the associated number of threads deleted.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Examples of excluded posts because of idiomatic language or reference to animals.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="130"/>
              <col width="0"/>
              <col width="300"/>
              <col width="0"/>
              <col width="0"/>
              <col width="180"/>
              <col width="0"/>
              <col width="360"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Idiom or forum name</td>
                  <td colspan="2">Description</td>
                  <td colspan="3">Threads deleted, n (%)</td>
                  <td>Example text from excluded threads</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="6">
                    <bold>Idiom</bold>
                  </td>
                  <td colspan="3">
                    <break/>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>(1) Raise an <italic>eyebrow</italic><sup>a</sup>; (2) raise an <italic>eye brow</italic>; (3) raise <italic>eyebrows</italic></td>
                  <td colspan="2">This idiom is used to convey awe, consternation, or disbelief.</td>
                  <td colspan="3">(1) 51 (100);<break/>(2) 2 (100);<break/>(3) 18 (64)</td>
                  <td colspan="2">“I may be just freaking out but it does raise an <italic>eyebrow.</italic>”</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>(1) Bat an <italic>eyelid</italic>; (2) bat an <italic>eye lid</italic></td>
                  <td colspan="2">This idiom is used to show an emotional reaction.</td>
                  <td colspan="3">(1) 20 (100);<break/> (2) 1 (100)</td>
                  <td colspan="2">“And the doctor, like me, has seen so many she’s not going to bat an <italic>eyelid</italic>!”</td>
                </tr>
                <tr valign="top">
                  <td colspan="6">
                    <bold>Forum</bold>
                  </td>
                  <td colspan="3">
                    <break/>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
Animal Health—General
                  </td>
                  <td colspan="2">This forum is used to answer questions related to general pet health (treatment, parasites, infectious disease, etc).</td>
                  <td colspan="3">56 (100)</td>
                  <td colspan="2">“My 3 year old boxer has one <italic>eye</italic> that seems to droop and is a little redder than normal. [...] It has always been that way it could be a congenital abnormality such as <italic>entropion</italic>.”</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                  Animal Lovers Group
                  </td>
                  <td colspan="2">This forum was previously used to chat about anything related to pets and animals.</td>
                  <td colspan="3">1 (100)</td>
                  <td colspan="2">“Birds are wonderful. In this state, they seem to <italic>frown</italic> on folks feeding them in the park too, it really irritates me, what would our world be like without those lovely creatures singing their happy song to us, I love them.”</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                  Animal-Surgery
                  </td>
                  <td colspan="2">This forum was previously used to have questions answered by a veterinarian from PetDocsOnCall on all questions regarding animal surgery.</td>
                  <td colspan="3">2 (100)</td>
                  <td colspan="2">“My dog has ingrown <italic>eyelashes</italic>”</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
            Birds
                  </td>
                  <td colspan="2">This forum was used to answer questions about pet birds.<sup>b</sup></td>
                  <td colspan="3">5 (100)</td>
                  <td colspan="2">“My three year old peacock has cloudy <italic>eyes.</italic> One <italic>eye</italic> in particular, the lid seems to linger and appears to bulge (slightly) when looking at him straight.”</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                Cats
                  </td>
                  <td colspan="2">This forum was used to answer questions about pet cats.<sup>b</sup></td>
                  <td colspan="3">113 (100)</td>
                  <td colspan="2">“I don’t know what my cat has got into but his left <italic>eye</italic> has been watering really bad and is red inside. It is now red on the right <italic>eye</italic> but just around where the <italic>lashes</italic> would be.”</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
         Dogs
                  </td>
                  <td colspan="2">This forum was used to answer questions about pet dogs.<sup>b</sup></td>
                  <td colspan="3">153 (100)</td>
                  <td colspan="2">“Lumps on dogs <italic>eye lid</italic>”</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>Words referring to ophthalmology are italicized.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>These forums used to have questions answered by a veterinarian.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Posts Extraction and Filtering</title>
          <p>After filtering the threads, 129,393 posts associated with the resulting 22,195 threads remained, which then underwent additional layers of filtering for inclusion and exclusion (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Posts from 13,239 of the 22,195 threads were considered relevant and were therefore included because the thread title or question contained a relevant oculoplastic term (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>), which resulted in 44,882 included posts. An additional 1499 individual posts from 1090 other discussion threads also contained oculoplastic-related keywords (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>) and were therefore included in the analysis. The final corpus was composed of 46,381 posts within 14,329 threads, which were written between January 1, 1995, and December 18, 2019, in 273 forums.</p>
        </sec>
        <sec>
          <title>User Extraction</title>
          <p>These 46,381 posts were written by 18,319 users from 1995 to 2019. More specifically, 7458 posts (within 6346 threads) were written by 117 doctors, and 38,923 posts (within 13,788 threads) were written by 18,202 patients. Overall, 20.19% (3699/18,319) of users were male patients, 38.33% (7022/18,319) were female patients, 40.84% (7481/18,319) of the patients did not specify their sex, 0.41% (75/18,319) were male doctors, and 0.23% (42/18,319) were female doctors. A total of 5642 patients were included in this study. Their ages varied from 10 to 96 years, with an average of 44.8 years. A total of 6704 patients indicated their location (city, state, and/or country).</p>
        </sec>
      </sec>
      <sec>
        <title>Results From Keyword Processing</title>
        <sec>
          <title>Keyword Extraction</title>
          <p>Keyword extraction, sentiment analysis, and emotion analysis were performed using the IBM Watson NLU service, which generated 201,611 unique raw keywords, including 28,579 keywords from posts written by doctors and 184,890 keywords from posts written by patients, with some keywords common to both sets of posts (<xref rid="figure2" ref-type="fig">Figure 2</xref>). Further processing using the NLTK Python library grouped related keywords, resulting in 24,806 keywords from doctors’ posts and 156,080 keywords from patients’ posts. For instance, “eyes” became “eye,” “eyelids” became “eyelid,” and “eye lashes” became “eye lash.”</p>
        </sec>
        <sec>
          <title>Keyword Selection and Categorization</title>
          <p>Keywords that occurred at least 500 times in the corpus were included for analysis; 383 keywords were from patients’ posts and 54 keywords were from doctors’ posts. We grouped these keywords into nine relevant categories: body parts; medical signs, symptoms, and diseases; people; medication and treatment; procedures; complications; administration; aggravating and relieving factors; and others. Some of these categories were then subdivided into more precise clinical concepts. For example, the broad category <italic>body parts</italic> contained keywords related to the head, neck, upper limbs, thorax, and lower limbs. The category <italic>medical signs, symptoms, and diseases</italic> was subdivided by specialty (oculoplastics, ophthalmology, psychiatry, neurology, endocrinology, integumentary, immunology, cardiology, and gastroenterology). The <italic>people</italic> category contained references to eye care doctors, nonocular medical specialists, surgeons, family doctors, family members, friends, and other health care professionals (<xref rid="figure3" ref-type="fig">Figure 3</xref>) [<xref ref-type="bibr" rid="ref38">38</xref>].</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Nested bubble chart showing the top 500 keywords associated with patient posts and grouped into clinically relevant categories. The size of each bubble is proportional to the frequency of the keyword. The color of each bubble represents the most likely emotion associated with the keyword. The shade of each bubble is proportional to the likelihood of the emotion score; emotions that are more likely are in darker bubbles. APPT: appointment; BP: blood pressure; ED: eye doctor; HP: hypothyroidism; MG: myasthenia gravis.</p>
            </caption>
            <graphic xlink:href="jmir_v23i5e20803_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Similar keywords that were aggregated include the following examples: “itch” encompassing both “itch” and “itching,” “diagnosis” replacing “dx” and “diagnosis,” “eyelid” including both “eyelid” and “eye lid,” “eyebrow” (“eye brow” and “eyebrow”), “twitch” (“twitch” and “twitching”), “treatment” (“tx” and “treatment”), “non specified doctor” (“doctor,” “doc,” “dr,” “physician,” and “md”), and “ophthalmologist” (“ophthalmologist” and “ophthamologists” [sic]).</p>
        </sec>
      </sec>
      <sec>
        <title>Sentiment and Emotion Analysis</title>
        <p>Summary statistics were therefore performed using keyword groupings (<xref rid="figure3" ref-type="fig">Figures 3</xref> and <xref rid="figure4" ref-type="fig">4</xref>). Medical signs, symptoms, and diseases had the lowest overall sentiment scores (−0.598). Complications were highly associated with sadness (likelihood sadness score of 0.485). Forum posts mentioning body parts were related to sadness (likelihood sadness score of 0.416) and fear (likelihood fear score of 0.321). Administration was the category with the highest anger score (0.146).</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Top 8 groupings and their respective overall sentiment and emotion scores. The overall sentiment score reflects a positive, neutral, or negative sentiment, whereas the emotion score (anger, disgust, fear, joy, and sadness) represents how likely (%) the emotion is to be present.</p>
          </caption>
          <graphic xlink:href="jmir_v23i5e20803_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>We further analyzed sentiments and emotions by the forum subgroup. We compared the most popular forums among each other by analyzing the sentiment and emotion scores of their posts (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). All 6 forums had an overall negative sentiment score; the most negative one being the <italic>Neurology</italic> forum with a score of −0.438. The <italic>Undiagnosed Symptoms</italic> forum had the highest sadness score (0.448). The least likely fearful posts were those from the <italic>Eye Care</italic> forum, with a score of 0.260.</p>
        <p>We also analyzed all the posts from users who asked questions (ie, initiated new threads) on MedHelp. These posts were divided into two categories: the pre–doctor reply group and the post–doctor reply group. The pre–doctor reply group included all the questions, the self-replies, and replies to other users written by the initial user before a doctor replied. The post–doctor reply group included all the other posts written by the initial user after the first doctor replied. As seen in <xref ref-type="table" rid="table2">Table 2</xref>, the overall sentiment score is much more negative before the doctor replied. We can also see shifts in the emotion scores: anger, disgust, fear, and sadness decreased in likelihood whereas joy was expressed slightly more likely after the doctor replied.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Difference in sentiment and emotion scores between the posts written before and after a doctor replied.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="200"/>
            <col width="260"/>
            <col width="260"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Posts analyzed</td>
                <td>Pre–doctor reply group</td>
                <td>Post–doctor reply group</td>
                <td>Difference (post − pre)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Posts expressing the following sentiment</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Negative, n (%)</td>
                <td>1553 (92.22)</td>
                <td>1260 (49.55)</td>
                <td>−42.67%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Neutral, n (%)</td>
                <td>11 (0.65)</td>
                <td>110 (4.33)</td>
                <td>+3.67%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Positive, n (%)</td>
                <td>120 (7.13)</td>
                <td>1172 (46.09)</td>
                <td>+38.97%</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Scores</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overall sentiment</td>
                <td>−0.557</td>
                <td>0.0268</td>
                <td>+0.584</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Anger</td>
                <td>0.143</td>
                <td>0.109</td>
                <td>−0.0334</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disgust</td>
                <td>0.126</td>
                <td>0.0740</td>
                <td>−0.0505</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Fear</td>
                <td>0.364</td>
                <td>0.233</td>
                <td>−0.130</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Joy</td>
                <td>0.308</td>
                <td>0.348</td>
                <td>+0.0391</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sadness</td>
                <td>0.5210</td>
                <td>0.335</td>
                <td>−0.186</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Innovation</title>
        <p>This is the first paper providing a detailed methodology for preparing unstructured text data from web-based health discussion forums related to ophthalmology for sentiment and emotion analyses. We detailed the steps performed to quantify patients’ and doctors’ sentiments from web-based discussion forums: searching results, extracting a data corpus of threads and posts, cleaning the data, analyzing text using IBM Watson NLU, and aggregating and processing the important keywords from each post. Our goal was to explain these key steps and highlight the applicability of our methods to the field of medicine and the factors to consider in the process, notably the selection of search terms; understanding the latter’s different linguistic usages (eg, idioms); the adequate consideration of different forums; and the establishment of robust criteria for data cleaning, aggregation, and grouping of posts and keywords (eg, lowercasing, punctuation removal, and lemmatization). Our approach highlights the importance of considering the unique structure of discussions within web-based health forums, distinguishing between physician and patient posts and analyzing idiomatic language usage to determine text relevance in infoveillance studies, which we found to be important steps not commonly detailed in previous studies of web-based health forums [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>].</p>
      </sec>
      <sec>
        <title>Medical Application</title>
        <p>Analyses examining groupings (eg, administration; complications; procedures; medication and treatment; people; medical signs, symptoms, and diseases; time; and body parts), forum subgroups (eg, eye care, neurology, dermatology, thyroid disorders, multiple sclerosis, and undiagnosed symptoms), and patient-doctor interactions can enable researchers to provide key recommendations to physicians. In the oculoplastics data set, patients had a highly negative overall sentiment score and emotion score (anger, disgust, fear, and sadness) before the doctor replied (<xref ref-type="table" rid="table2">Table 2</xref>). To improve patient satisfaction, health care professionals can address their concerns by adapting their responses to the patients’ sentiments and emotions. These sentiments and emotions can be further broken down by grouping and forums. Each grouping can be addressed with different solutions, such as reducing appointment and waiting time; explaining medical signs, symptoms, and diseases; and reassuring patients’ concerns regarding specific procedures and body parts (<xref rid="figure4" ref-type="fig">Figure 4</xref>). Each forum’s scores indicate how the corresponding health care team (eg, neurology, endocrinology, and ophthalmology) must communicate with patients to better manage different emotions, different emotions by predominantly addressing patients’ sadness, disgust, fear, or even joy (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p>
      </sec>
      <sec>
        <title>Challenges and Factors to Consider</title>
        <p>Several issues must be carefully considered when gathering data from internet sources and unstructured free text to ensure relevance to the desired topic. First, the selection of the search terms is critical when analyzing web-based content. A deep understanding of the chosen field along with its related terms (eg, symptoms, complications, and subfields) is crucial to establish a complete list that encompasses all the possible relevant thread discussions. Second, a thorough understanding of the linguistic usages of the search terms is critical for establishing adequate data cleaning algorithms (eg, removal of threads containing the search terms exclusively used as idioms and consideration of human speech variance in the filtering algorithm). There are many eye-related idioms in the English language that must be considered when analyzing web-based text for ophthalmology-related insights (eg, “bat an eyelid”); every specialty will have its own unique set of idioms related to anatomical parts or functions (eg, “break my heart” and “take my breath away”) that must be taken into consideration. The results can also differ according to the terms’ specificity: broader terms (eg, eyelids, eyebrows, and oculoplastics) encompass the oculoplastics field, whereas more specific terms (eg, blepharitis, entropion, and ectropion) refer to specific medical conditions in this field. It is recommended to choose all relevant search terms (broad and specific) to ensure exhaustive results. However, a robust and tailored filtering algorithm must be established to ensure a relevant data set that is not influenced by the initial results returned by any proprietary search algorithm for any platform.</p>
        <p>Indeed, every social media platform will have individual and proprietary search functions that may retrieve information irrelevant to the original query. Therefore, a careful and tailored process for further filtering is required to remove irrelevant results. Key decisions must be made on the filtering process (filtering by topic title, discussion thread, and/or individual post content). Establishing these filtering guidelines is crucial to ensure that the content of the posts selected is relevant and that the posts discarded do not contain relevant information. Basing the filtering algorithm on the relevancy of the thread topic allows for this methodology to be applied to many other social media platforms that often contain similar data structures (eg, on Facebook, Twitter, and Instagram, a main post (topic or title) is followed by comments (replies) related to the initial topic).</p>
        <p>Furthermore, the scope of the search must also be evaluated. Depending on the topic selected, forums outside of those dedicated to the primary specialty may also need to be included. In our study, we considered a wide variety of MedHelp forums outside the eye care forums as oculoplastics is a field at the intersection of ophthalmology and plastic surgery. The <italic>Eye Care</italic> forum is only one of the 273 forums that contained our relevant threads and posts (ie, the <italic>Cosmetic Surgery</italic>, <italic>Dermatology</italic>, <italic>Neurology</italic>, and <italic>Thyroid Disorders</italic> forums). As we took all MedHelp forums into account during the extraction process, more constraints had to be established. For example, all forums related to animal care needed to be excluded.</p>
        <p>After carefully selecting individual posts on which sentiment analysis is performed, the keywords extracted by the program will be numerous and lexically repetitive. Therefore, care must be taken to normalize the results originally sourced from free text. Using NLP tools to process and group the keywords with the same clinical meaning is a crucial step to ensure that the analysis is performed on uniform and clean data. To facilitate the grouping of related processed keywords, following a systematic method, such as ours (all keywords with a frequency greater than 500 and keyword categorization by 2 reviewers), prevents biases from being induced into the sentiment analysis and results.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Although the effects of users’ spatiotemporal characteristics on sentiment analyses in MedHelp have not been evaluated yet, studies have shown that these features can bias the results of sentiment analysis derived from tweets. Gore et al showed that sentiment analysis can yield biased measures related to population demographics at the municipal, state, and national levels [<xref ref-type="bibr" rid="ref41">41</xref>]. Another study demonstrated that an individual’s location throughout the day can also affect their tweets’ sentiment [<xref ref-type="bibr" rid="ref42">42</xref>]. These issues can be addressed by assessing the population represented by posts on the web. In the case of Twitter, only 15% of adults on the web regularly use Twitter, and those aged 18-29 years and minorities tend to be more highly represented on Twitter than in the general population [<xref ref-type="bibr" rid="ref43">43</xref>]. Although it is unclear what effect these spatial, temporal, and demographic effects may have on sentiment and emotion reflected in forum posts, they have the potential to affect these findings. We acknowledge that not all patients will rely on web-based forums to discuss their medical concerns or receive expert advice, especially the most vulnerable (older adults, minority, and socioeconomic groups).</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Despite these limitations, the internet is a major source of health-related information that is underused [<xref ref-type="bibr" rid="ref44">44</xref>]. In this paper, we describe an accessible, quick, and robust approach to sentiment analysis of patient data in social media that is relevant to a chosen medical topic, such as oculoplastics, and highlight the technical challenges encountered when preparing and analyzing the data. Regardless of the clinical questions examined, important factors to be considered during the application of this methodology include assessing the scope of the research; determining search terms and understanding their different linguistic usages; and implementing selection, filtering, and processing criteria for posts and keywords tailored to the results. This emerging methodology can be used as a valuable guide for clinicians and researchers who want to better understand patient attitudes toward and patient satisfaction with particular fields and procedures. The analysis of web-based forum discussions can be a quick, efficient, and robust method for gathering unstructured, diverse, and detailed opinions relevant to a chosen medical topic such as oculoplastics.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Initial search terms.</p>
        <media xlink:href="jmir_v23i5e20803_app1.docx" xlink:title="DOCX File , 31 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>List of patterns used to filter threads based on their titles or initial questions.</p>
        <media xlink:href="jmir_v23i5e20803_app2.docx" xlink:title="DOCX File , 32 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>List of patterns used to filter posts based on their content.</p>
        <media xlink:href="jmir_v23i5e20803_app3.docx" xlink:title="DOCX File , 31 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Top 6 forums and their respective overall sentiment and emotion scores. The overall sentiment score reflects a positive, neutral, or negative sentiment, whereas the emotion score (anger, disgust, fear, joy, and sadness) represents how likely (%) the emotion is to be present. These forums had the highest number of posts and threads (displayed in the table).</p>
        <media xlink:href="jmir_v23i5e20803_app4.docx" xlink:title="DOCX File , 16 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">NLTK</term>
          <def>
            <p>Natural Language Toolkit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NLU</term>
          <def>
            <p>Natural Language Understanding</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank Jerry Kurian for his expert opinion on the code and the reviewers for their valuable comments. Funding associated with this study includes T15 LM 007033 (SYW) and departmental support from the National Institute of Health-National Eye Institute Grant P30-EY026877 (SYW and AYW), as well as the unrestricted department grant from Research to Prevent Blindness, Inc (SYW and AYW). There are no commercial relationships to disclose.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Street</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Haidet</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>How well do doctors know their patients? Factors affecting physician understanding of patients' health beliefs</article-title>
          <source>J Gen Intern Med</source>
          <year>2011</year>
          <month>01</month>
          <volume>26</volume>
          <issue>1</issue>
          <fpage>21</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20652759"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-010-1453-3</pub-id>
          <pub-id pub-id-type="medline">20652759</pub-id>
          <pub-id pub-id-type="pmcid">PMC3024116</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dawn</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>PP</given-names>
            </name>
          </person-group>
          <article-title>Patient expectations for medical and surgical care: a review of the literature and applications to ophthalmology</article-title>
          <source>Surv Ophthalmol</source>
          <year>2004</year>
          <volume>49</volume>
          <issue>5</issue>
          <fpage>513</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1016/j.survophthal.2004.06.004</pub-id>
          <pub-id pub-id-type="medline">15325196</pub-id>
          <pub-id pub-id-type="pii">S0039-6257(04)00111-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Smartphone ownership is growing rapidly around the world, but not always equally</article-title>
          <source>Pew Research Center</source>
          <year>2019</year>
          <access-date>2021-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/global/2019/02/05/smartphone-ownership-is-growing-rapidly-around-the-world-but-not-always-equally/">https://www.pewresearch.org/global/2019/02/05/smartphone-ownership-is-growing-rapidly-around-the-world-but-not-always-equally/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <article-title>Global digital population as of January 2021</article-title>
          <source>Statista</source>
          <year>2020</year>
          <access-date>2020-04-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.statista.com/statistics/617136/digital-population-worldwide/">https://www.statista.com/statistics/617136/digital-population-worldwide/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Duggan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Health online 2013</article-title>
          <source>Pew Research Center</source>
          <year>2013</year>
          <access-date>2021-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2013/01/15/health-online-2013/">https://www.pewresearch.org/internet/2013/01/15/health-online-2013/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sadah</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Shahbazi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wiley</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Hristidis</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Demographic-based content analysis of web-based health-related social media</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <month>06</month>
          <day>13</day>
          <volume>18</volume>
          <issue>6</issue>
          <fpage>e148</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2016/6/e148/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5327</pub-id>
          <pub-id pub-id-type="medline">27296242</pub-id>
          <pub-id pub-id-type="pii">v18i6e148</pub-id>
          <pub-id pub-id-type="pmcid">PMC4923586</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pournaras</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nikolic</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Omerzel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Helbing</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Engineering democratization in internet of things data analytics</article-title>
          <source>Proceedings of the IEEE 31st International Conference on Advanced Information Networking and Applications (AINA)</source>
          <year>2017</year>
          <conf-name>IEEE 31st International Conference on Advanced Information Networking and Applications (AINA)</conf-name>
          <conf-date>March 27-29, 2017</conf-date>
          <conf-loc>Taipei, Taiwan</conf-loc>
          <pub-id pub-id-type="doi">10.1109/aina.2017.15</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mavragani</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>04</month>
          <day>28</day>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>e16206</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/4/e16206/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16206</pub-id>
          <pub-id pub-id-type="medline">32310818</pub-id>
          <pub-id pub-id-type="pii">v22i4e16206</pub-id>
          <pub-id pub-id-type="pmcid">PMC7189791</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Das</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Faxvaag</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>What influences patient participation in an online forum for weight loss surgery? A qualitative case study</article-title>
          <source>Interact J Med Res</source>
          <year>2014</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>e4</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.i-jmr.org/2014/1/e4/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/ijmr.2847</pub-id>
          <pub-id pub-id-type="medline">24509408</pub-id>
          <pub-id pub-id-type="pii">v3i1e4</pub-id>
          <pub-id pub-id-type="pmcid">PMC3936279</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dosani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Harding</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Online groups and patient forums</article-title>
          <source>Curr Psychiatry Rep</source>
          <year>2014</year>
          <month>11</month>
          <volume>16</volume>
          <issue>11</issue>
          <fpage>507</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25273668"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11920-014-0507-3</pub-id>
          <pub-id pub-id-type="medline">25273668</pub-id>
          <pub-id pub-id-type="pmcid">PMC4182653</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haselmayer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jenny</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis of political communication: combining a dictionary approach with crowdcoding</article-title>
          <source>Qual Quant</source>
          <year>2017</year>
          <volume>51</volume>
          <issue>6</issue>
          <fpage>2623</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29070915"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11135-016-0412-4</pub-id>
          <pub-id pub-id-type="medline">29070915</pub-id>
          <pub-id pub-id-type="pii">412</pub-id>
          <pub-id pub-id-type="pmcid">PMC5635074</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ranco</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Aleksovski</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Caldarelli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Grčar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mozetič</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>The effects of Twitter sentiment on stock price returns</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>9</issue>
          <fpage>e0138441</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0138441"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0138441</pub-id>
          <pub-id pub-id-type="medline">26390434</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-24174</pub-id>
          <pub-id pub-id-type="pmcid">PMC4577113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Htay</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Lynn</surname>
              <given-names>KT</given-names>
            </name>
          </person-group>
          <article-title>Extracting product features and opinion words using pattern knowledge in customer reviews</article-title>
          <source>ScientificWorldJournal</source>
          <year>2013</year>
          <volume>2013</volume>
          <fpage>394758</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2013/394758"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2013/394758</pub-id>
          <pub-id pub-id-type="medline">24459430</pub-id>
          <pub-id pub-id-type="pmcid">PMC3888732</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garcia-Rudolph</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Laxe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Saurí</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bernabeu Guitart</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Stroke survivors on Twitter: sentiment and topic analysis from a gender perspective</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>08</month>
          <day>26</day>
          <volume>21</volume>
          <issue>8</issue>
          <fpage>e14077</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/8/e14077/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14077</pub-id>
          <pub-id pub-id-type="medline">31452514</pub-id>
          <pub-id pub-id-type="pii">v21i8e14077</pub-id>
          <pub-id pub-id-type="pmcid">PMC6732975</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnsen</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Eggesvik</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Rørvik</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Hanssen</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Wynn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kummervold</surname>
              <given-names>PE</given-names>
            </name>
          </person-group>
          <article-title>Differences in emotional and pain-related language in Tweets about dentists and medical doctors: text analysis of Twitter content</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2019</year>
          <month>02</month>
          <day>06</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>e10432</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2019/1/e10432/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10432</pub-id>
          <pub-id pub-id-type="medline">30724738</pub-id>
          <pub-id pub-id-type="pii">v5i1e10432</pub-id>
          <pub-id pub-id-type="pmcid">PMC6381402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Talbot</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Kalisch</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Christoffersen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Forbell</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>Stud Health Technol Inform</source>
          <year>2016</year>
          <volume>220</volume>
          <fpage>407</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="medline">27046614</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Denecke</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis in medical settings: new opportunities and challenges</article-title>
          <source>Artif Intell Med</source>
          <year>2015</year>
          <month>05</month>
          <volume>64</volume>
          <issue>1</issue>
          <fpage>17</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2015.03.006</pub-id>
          <pub-id pub-id-type="medline">25982909</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(15)00029-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zunic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Corcoran</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis in health and well-being: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>01</month>
          <day>28</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>e16023</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/1/e16023/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16023</pub-id>
          <pub-id pub-id-type="medline">32012057</pub-id>
          <pub-id pub-id-type="pii">v8i1e16023</pub-id>
          <pub-id pub-id-type="pmcid">PMC7013658</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Pershing</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Understanding patient attitudes toward multifocal intraocular lenses in online medical forums through sentiment analysis</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2019</year>
          <month>08</month>
          <day>21</day>
          <volume>264</volume>
          <fpage>1378</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI190453</pub-id>
          <pub-id pub-id-type="medline">31438152</pub-id>
          <pub-id pub-id-type="pii">SHTI190453</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance: framework for an emerging set of public health informatics methods to analyze search, communication and publication behavior on the internet</article-title>
          <source>J Med Internet Res</source>
          <year>2009</year>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2009/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1157</pub-id>
          <pub-id pub-id-type="medline">19329408</pub-id>
          <pub-id pub-id-type="pii">v11i1e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC2762766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Ping</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Avis</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Ip</surname>
              <given-names>EH</given-names>
            </name>
          </person-group>
          <article-title>Symptom clusters in women with breast cancer: an analysis of data from social media and a research study</article-title>
          <source>Qual Life Res</source>
          <year>2016</year>
          <month>03</month>
          <volume>25</volume>
          <issue>3</issue>
          <fpage>547</fpage>
          <lpage>57</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26476836"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11136-015-1156-7</pub-id>
          <pub-id pub-id-type="medline">26476836</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11136-015-1156-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC5129624</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Filtering big data from social media--building an early warning system for adverse drug reactions</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>04</month>
          <volume>54</volume>
          <fpage>230</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00013-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.01.011</pub-id>
          <pub-id pub-id-type="medline">25688695</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00013-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Dhillon</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Women's obstetric and reproductive health care discourse in online forums: perceived access and quality pre- and post-Affordable Care Act</article-title>
          <source>Prev Med</source>
          <year>2019</year>
          <month>07</month>
          <volume>124</volume>
          <fpage>50</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ypmed.2019.04.013</pub-id>
          <pub-id pub-id-type="medline">31028754</pub-id>
          <pub-id pub-id-type="pii">S0091-7435(19)30146-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castaneda</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sales</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Osborne</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Corriere</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Scope, themes, and medical accuracy of eHealth peripheral artery disease community forums</article-title>
          <source>Ann Vasc Surg</source>
          <year>2019</year>
          <month>1</month>
          <volume>54</volume>
          <fpage>92</fpage>
          <lpage>102</lpage>
          <pub-id pub-id-type="doi">10.1016/j.avsg.2018.09.004</pub-id>
          <pub-id pub-id-type="medline">30267913</pub-id>
          <pub-id pub-id-type="pii">S0890-5096(18)30769-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <source>MedHelp</source>
          <access-date>2021-05-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medhelp.org/">https://www.medhelp.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hagan</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Kutryb</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Internet eye questions</article-title>
          <source>Ophthalmology</source>
          <year>2009</year>
          <month>10</month>
          <volume>116</volume>
          <issue>10</issue>
          <fpage>2036</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ophtha.2009.05.008</pub-id>
          <pub-id pub-id-type="medline">19800523</pub-id>
          <pub-id pub-id-type="pii">S0161-6420(09)00518-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>Python</article-title>
          <source>Python Software Foundation</source>
          <access-date>2021-05-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.python.org/">https://www.python.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <source>Beautiful Soup</source>
          <year>2007</year>
          <access-date>2020-08-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.crummy.com/software/BeautifulSoup/bs4/doc/">https://www.crummy.com/software/BeautifulSoup/bs4/doc/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hipp</surname>
              <given-names>RD</given-names>
            </name>
          </person-group>
          <source>SQLite</source>
          <year>2020</year>
          <access-date>2021-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sqlite.org/index.html">https://www.sqlite.org/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Trinh</surname>
              <given-names>XV</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>SY</given-names>
            </name>
          </person-group>
          <article-title>Social Media Sentiment Emotion Analysis</article-title>
          <source>Zenodo</source>
          <access-date>2020-08-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/eyelovedata/social-media-sentiment-emotion-analysis">https://github.com/eyelovedata/social-media-sentiment-emotion-analysis</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bird</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Loper</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing with python</article-title>
          <source>O’Reilly</source>
          <year>2009</year>
          <access-date>2020-08-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.datascienceassn.org/sites/default/files/Natural%20Language%20Processing%20with%20Python.pdf">http://www.datascienceassn.org/sites/default/files/Natural%20Language%20Processing%20with%20Python.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <article-title>Watson Natural Language Understanding</article-title>
          <source>IBM Corp</source>
          <year>2020</year>
          <access-date>2020-04-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ibm.com/ca-en/marketplace/natural-language-understanding">https://www.ibm.com/ca-en/marketplace/natural-language-understanding</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>The Jupyter Notebook</article-title>
          <source>The Jupyter Team</source>
          <year>2015</year>
          <access-date>2020-08-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jupyter-notebook.readthedocs.io/en/stable/">https://jupyter-notebook.readthedocs.io/en/stable/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bird</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Loper</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing with python</article-title>
          <source>Natural Language Toolkit</source>
          <year>2009</year>
          <access-date>2020-08-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nltk.org/">https://www.nltk.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <source>NumPy</source>
          <access-date>2020-08-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://numpy.org/">https://numpy.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <article-title>Pandas</article-title>
          <source>Qeios</source>
          <access-date>2021-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pandas.pydata.org/">https://pandas.pydata.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>WordNet</article-title>
          <source>Princeton University</source>
          <year>2010</year>
          <access-date>2021-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://wordnet.princeton.edu/citing-wordnet">https://wordnet.princeton.edu/citing-wordnet</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <source>Oculoplastics keywords</source>
          <access-date>2020-05-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://oculoplastics-keywords.herokuapp.com/">https://oculoplastics-keywords.herokuapp.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sadah</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hristidis</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Talbot</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Health effects associated with electronic cigarette use: automated mining of online forums</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>01</month>
          <day>03</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>e15684</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/1/e15684/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/15684</pub-id>
          <pub-id pub-id-type="medline">31899452</pub-id>
          <pub-id pub-id-type="pii">v22i1e15684</pub-id>
          <pub-id pub-id-type="pmcid">PMC6969389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McRoy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ruddy</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Haddad</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Assessing unmet information needs of breast cancer survivors: exploratory study of online health forums using text classification and retrieval</article-title>
          <source>JMIR Cancer</source>
          <year>2018</year>
          <month>05</month>
          <day>15</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>e10</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancer.jmir.org/2018/1/e10/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/cancer.9050</pub-id>
          <pub-id pub-id-type="medline">29764801</pub-id>
          <pub-id pub-id-type="pii">v4i1e10</pub-id>
          <pub-id pub-id-type="pmcid">PMC5974460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gore</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Diallo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Padilla</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>You are what you Tweet: connecting the geographic variation in America's obesity rate to Twitter content</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>9</issue>
          <fpage>e0133505</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0133505"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0133505</pub-id>
          <pub-id pub-id-type="medline">26332588</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-02269</pub-id>
          <pub-id pub-id-type="pmcid">PMC4557976</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Padilla</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kavak</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lynch</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gore</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Diallo</surname>
              <given-names>SY</given-names>
            </name>
          </person-group>
          <article-title>Temporal and spatiotemporal investigation of tourist attraction visit sentiment on Twitter</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>e0198857</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0198857"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0198857</pub-id>
          <pub-id pub-id-type="medline">29902270</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-02998</pub-id>
          <pub-id pub-id-type="pmcid">PMC6002102</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brenner</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Twitter use 2012</source>
          <year>2012</year>
          <access-date>2021-04-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2012/05/31/twitter-use-2012/">https://www.pewresearch.org/internet/2012/05/31/twitter-use-2012/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carrillo-de-Albornoz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rodríguez Vidal</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Plaza</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Feature engineering for sentiment analysis in e-health forums</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <volume>13</volume>
          <issue>11</issue>
          <fpage>e0207996</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0207996"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0207996</pub-id>
          <pub-id pub-id-type="medline">30496232</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-03189</pub-id>
          <pub-id pub-id-type="pmcid">PMC6264154</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
