<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v24i8e29186</article-id>
      <article-id pub-id-type="pmid">35917151</article-id>
      <article-id pub-id-type="doi">10.2196/29186</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Identifying Patients With Inflammatory Bowel Disease on Twitter and Learning From Their Personal Experience: Retrospective Cohort Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Office</surname>
            <given-names>JMIR Publications Editorial</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>Jinying</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wang</surname>
            <given-names>Hanyin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kiritchenko</surname>
            <given-names>Svetlana</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Stemmer</surname>
            <given-names>Maya</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Industrial Engineering and Management</institution>
            <institution>Ben-Gurion University of the Negev</institution>
            <addr-line>POB 653</addr-line>
            <addr-line>Beer-Sheva, 84105</addr-line>
            <country>Israel</country>
            <phone>972 8 6461434</phone>
            <email>mayast@post.bgu.ac.il</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7572-4327</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Parmet</surname>
            <given-names>Yisrael</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2071-7338</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ravid</surname>
            <given-names>Gilad</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0471-6538</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Industrial Engineering and Management</institution>
        <institution>Ben-Gurion University of the Negev</institution>
        <addr-line>Beer-Sheva</addr-line>
        <country>Israel</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Maya Stemmer <email>mayast@post.bgu.ac.il</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>2</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>24</volume>
      <issue>8</issue>
      <elocation-id>e29186</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>5</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>10</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>20</day>
          <month>5</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Maya Stemmer, Yisrael Parmet, Gilad Ravid. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 02.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2022/8/e29186" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Patients use social media as an alternative information source, where they share information and provide social support. Although large amounts of health-related data are posted on Twitter and other social networking platforms each day, research using social media data to understand chronic conditions and patients’ lifestyles is limited.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>In this study, we contributed to closing this gap by providing a framework for identifying patients with inflammatory bowel disease (IBD) on Twitter and learning from their personal experiences. We enabled the analysis of patients’ tweets by building a classifier of Twitter users that distinguishes patients from other entities. This study aimed to uncover the potential of using Twitter data to promote the well-being of patients with IBD by relying on the wisdom of the crowd to identify healthy lifestyles. We sought to leverage posts describing patients’ daily activities and their influence on their well-being to characterize lifestyle-related treatments.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>In the first stage of the study, a machine learning method combining social network analysis and natural language processing was used to automatically classify users as patients or not. We considered 3 types of features: the user’s behavior on Twitter, the content of the user’s tweets, and the social structure of the user’s network. We compared the performances of several classification algorithms within 2 classification approaches. One classified each tweet and deduced the user’s class from their tweet-level classification. The other aggregated tweet-level features to user-level features and classified the users themselves. Different classification algorithms were examined and compared using 4 measures: precision, recall, F1 score, and the area under the receiver operating characteristic curve. In the second stage, a classifier from the first stage was used to collect patients' tweets describing the different lifestyles patients adopt to deal with their disease. Using IBM Watson Service for entity sentiment analysis, we calculated the average sentiment of 420 lifestyle-related words that patients with IBD use when describing their daily routine.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Both classification approaches showed promising results. Although the precision rates were slightly higher for the tweet-level approach, the recall and area under the receiver operating characteristic curve of the user-level approach were significantly better. Sentiment analysis of tweets written by patients with IBD identified frequently mentioned lifestyles and their influence on patients’ well-being. The findings reinforced what is known about suitable nutrition for IBD as several foods known to cause inflammation were pointed out in negative sentiment, whereas relaxing activities and anti-inflammatory foods surfaced in a positive context.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study suggests a pipeline for identifying patients with IBD on Twitter and collecting their tweets to analyze the experimental knowledge they share. These methods can be adapted to other diseases and enhance medical research on chronic conditions.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>patient identification</kwd>
        <kwd>inflammatory bowel disease</kwd>
        <kwd>IBD</kwd>
        <kwd>user classification</kwd>
        <kwd>Twitter</kwd>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
        <kwd>sentiment analysis</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Social networking sites and web-based communities have served as alternative information sources for patients in recent years. Patients everywhere use social media to share health and treatment information, learn from each other’s experiences, and provide social support. Mining these informative conversations may shed some light on patients’ ways of life and support research on chronic conditions.</p>
        <p>In recent years, text mining and social network analysis have been used to detect mentions of health on Twitter [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>] or to track the spread of the COVID-19 pandemic and symptoms [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Regarding chronic conditions, previous research has focused on analyzing patients’ tweets and uncovering their Twitter community [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. Although a relatively large amount of research has been dedicated to diabetes or cancer, research on inflammatory bowel disease (IBD) is only just starting to consolidate.</p>
        <p>IBD is a chronic inflammatory condition of the digestive system characterized by flares and remission states. The 2 primary diseases identified with IBD, Crohn disease and ulcerative colitis, are usually diagnosed in young patients (in the age range of 15-30 years). The incidence of IBD is rapidly increasing, and it has evolved into a global disease [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <p>There are no medications or surgical procedures that can cure IBD. Treatment options can only help with symptoms, and they affect each patient differently. They involve prescription drugs and lifestyle-related solutions such as diets and therapies. Symptoms include abdominal pain, diarrhea, and fatigue, and severe cases may result in hospitalization or surgical interventions [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. As chronic bowel diseases, both Crohn disease and ulcerative colitis require day-to-day care for drug consumption and special nutrition.</p>
        <p>Patients describe IBD as an embarrassing disease that causes the immediate disruption of daily activities. They experience difficulties in adjusting to the changes it entails and consider themselves different from their peers. As IBD is characterized by frequent bowel movements, people do not hasten to share their disease with others [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. According to patients with IBD, part of the embarrassment can be attributed to a lack of public awareness. Outsiders cannot see that a person’s stomach hurts or that their bowels are scarred. The disease is invisible, and others might doubt that it exists [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>The embarrassment caused by IBD and the need to confide in people with similar experiences help explain the creation of IBD-related communities on Twitter. By overcoming space and distance, Twitter users form a community that disregards physical boundaries or immobility. A sense of common ground can help break down barriers and enable conversation, increasing a person’s willingness to share [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. It may be easier to consult with other patients who can relate and better understand the situation based on personal experience. One can identify more closely with user stories similar to one’s own and embrace their advice more easily [<xref ref-type="bibr" rid="ref24">24</xref>]. When people disclose health information on Twitter, they expose themselves to a large variety of opinions and reduce the uncertainty about their disease [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
        <p>Owing to the nature of IBD and its influence on the digestive system, patients with IBD are forced to deal with their disease daily, adhere to strict dietary regimens, and maintain a calm routine. Changes in nutrition or physical activity, which are currently tested by trial and error, result in a long and excruciating process for the patients. We can learn from their personal experiences and provide an additional foundation for existing medical knowledge of the disease by collecting and analyzing patients’ social media data. Complementary recommendations based on the wisdom of the crowd can ease patients’ lives and shorten the process of finding the right lifestyle for them.</p>
      </sec>
      <sec>
        <title>Objective and Contribution</title>
        <p>This study aimed to uncover the potential of using Twitter data to promote the well-being of patients with IBD by collecting and analyzing the personal experiences they share about their disease. We suggested a framework for identifying patients with IBD on Twitter and examining the content they share regarding their disease. We started by building a user classifier that distinguishes patients from other entities who talk about IBD on Twitter and then used the classifier to collect patients’ tweets and explore the lifestyle-related treatments they undergo to cope with their disease.</p>
        <p>This study focused on creating a pipeline for using Twitter data for identifying patients with IBD and exploring the information they share. Although each part of this study can be extended by trying other classification methods or enriching the analysis of the patients’ tweets, this study shows the potential of using Twitter data to enhance medical knowledge of IBD. We showed that patients can be identified on Twitter based on their communication even using classic, simple classification algorithms. We compared the performances of 2 different approaches for user classification—a single instance (SI) learning approach and a multiple instance (MI) learning approach—and showed the benefits of using the latter. The preliminary analysis in the second part of this study showed that it is possible to derive health-related insights from self-reported tweets by patients.</p>
        <p>Using the suggested framework to identify more patients and collect more of their data could uncover their sentiments toward the treatments they try or explore other aspects of the disease, such as its influence on patients’ quality of life. The framework is also feasibly extended to other chronic conditions. It can be used to compare discussion patterns of patients with IBD with those of the general population or of patients with other chronic conditions.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <sec>
          <title>Twitter and Health</title>
          <p>The study of social media in the context of health and well-being continues to position Twitter as a new medium for disseminating health-related information. Health-related tweets range from a simple toothache to more severe and chronic diseases such as diabetes, asthma, or cancer [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Patients with amyotrophic lateral sclerosis use Twitter as a means of communication, and local health departments in the United States use Twitter to educate and disseminate information related to diabetes [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. Even a sensitive disease such as HIV is discussed on Twitter [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>]. Communication patterns regarding who tweets about what and why vary by disease [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
          <p>Twitter is a powerful tool for disseminating health information and an accessible platform for patients needing immediate social support or relief. It provides a collaborative environment for health-related conversations where patients with chronic illnesses share their health status daily. They use Twitter to exchange knowledge about lifestyle implications or better understand a medical procedure. Through Twitter, they can easily and conveniently reach a large audience and various opinions [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
          <p>In total, 2 previous studies have presented models for detecting personal health mentions on Twitter and shown promising, scalable results [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. However, their goal differs from ours as they considered all tweets that discussed a specific person’s health condition as positive. In our study, we sought to identify patients with a specific disease. We not only classified tweets written by patients but also classified the users themselves.</p>
        </sec>
        <sec>
          <title>Communication Patterns on Twitter</title>
          <p>Different types of users communicate differently on Twitter. They connect differently with others, have different tweeting habits, and differ in style and linguistic content. Studying the conversational connections between Twitter users and text mining their tweets can help classify users based on their characteristics and identify different types of users [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref38">38</xref>].</p>
          <p>Private individuals reflect mainly on their personal experiences or sentiments and tend to engage with others. They are both frequently mentioned and frequently mentioning other users. By contrast, organizations often point to external information sources via URLs and are not that active at connecting with others. They are frequently mentioned in tweets, perhaps as sources of information, but are much less inclined to mention other users [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>].</p>
          <p>By analyzing a user’s screen name (ie, the username of their Twitter account) or their biography (ie, their Twitter user description), one can determine whether the user is an ordinary individual or an organization and reveal latent user properties [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>].</p>
          <p>Our study relies on those previous findings and constructs classification features that help differentiate patients with IBD from other users who tweet about the disease. We adapted and extended previous methods to cope with the different task of identifying patients with IBD on Twitter.</p>
        </sec>
        <sec>
          <title>Twitter and IBD</title>
          <p>Exploring the entities that engage in IBD-related discussions on Twitter reveals that patients with IBD are the most common type of users who talk about IBD on Twitter [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. Patients with IBD use Twitter for sharing personal experiences and seeking social support. They exchange thoughts about symptoms and medications and recommend treatments to one another [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. By sharing their life experiences with the disease on Twitter, patients fight disease invisibility and raise public awareness of IBD [<xref ref-type="bibr" rid="ref47">47</xref>].</p>
          <p>Perez et al [<xref ref-type="bibr" rid="ref48">48</xref>] explored the IBD community on Twitter and identified the types of users who talk about the disease and the key topics they discuss. They categorized users based on their Twitter profiles by analyzing their screen names and biographies. In our study, we investigated a large set of classification features and suggested a model to detect patients with IBD on Twitter based on the way they communicate and the content they share.</p>
          <p>Patients with IBD tend to be more emotional and negative than patients with other chronic conditions [<xref ref-type="bibr" rid="ref49">49</xref>]. They usually express a negative sentiment when they talk about the disease and its symptoms but positively address the diets and drugs that help manage them [<xref ref-type="bibr" rid="ref48">48</xref>]. Patients who engage in tweets offering social support are more likely to post positive tweets [<xref ref-type="bibr" rid="ref50">50</xref>].</p>
          <p>Unlike previous research related to patients’ sentiments on Twitter [<xref ref-type="bibr" rid="ref48">48</xref>-<xref ref-type="bibr" rid="ref50">50</xref>], we focused our research on entity sentiment rather than the sentiment of the entire tweet. By analyzing patients’ sentiments toward specific keywords related to nutrition and fitness, we uncovered the sentiments of certain lifestyles that influence the disease.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>This study was conducted in 2 main stages. In the Patient Identification section, we built a user classifier that distinguishes patients from other entities who talk about IBD on Twitter. We considered three types of classification features: (1) features extracted from the user’s activity on Twitter, (2) the content of the user’s tweets, and (3) the social structure of the user’s network. We compared the performances of several classification algorithms within 2 classification approaches: one that starts by classifying tweets separately and then deduces the user’s class from their tweet-level classification and one that starts by aggregating tweet-level features to user-level features and then classifies the users themselves.</p>
        <p>In the Analyzing Patients’ Tweets section covering the second stage of the study, we derived insights regarding IBD from the personal experiences that patients share on Twitter. We collected lifestyle-related tweets by querying the Twitter application programming interface (API) for special keywords related to nutrition or fitness. We then filtered their authors using a classifier from the first stage of the study to obtain a collection of tweets where patients with IBD describe the different diets and physical activities they adopt to deal with their disease. We identified frequently mentioned lifestyles and used IBM Watson Service for entity sentiment analysis to assess their effectiveness.</p>
        <p><xref rid="figure1" ref-type="fig">Figures 1</xref> and <xref rid="figure2" ref-type="fig">2</xref> describe the general flow of the 2 main stages of the study. <xref rid="figure1" ref-type="fig">Figure 1</xref> describes how we used Twitter data to classify users and identify patients with IBD. <xref rid="figure2" ref-type="fig">Figure 2</xref> demonstrates how we used the classification to analyze patients' tweets.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The general workflow of the first stage of the study: building a classifier of Twitter users for identifying patients with inflammatory bowel disease (IBD).</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e29186_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The general workflow of the second stage of the study: using the classification from the first stage for analyzing patients' tweets. IBD: inflammatory bowel disease.</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e29186_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Patient Identification</title>
        <sec>
          <title>Data Collection and Preparation</title>
          <p>We used the Twitter Search API to collect 10 days of IBD-related tweets (from February 11, 2018, at noon to February 21, 2018, at noon). We used the OR operator to search for at least one of 3 keywords: <italic>crohn</italic>, <italic>colitis,</italic> and <italic>#IBD</italic>. The abbreviation IBD was searched as a hashtag to avoid news-related tweets by the Investor’s Business Daily Editorials account, which is usually marked with <italic>IBD</italic>. We limited the search to tweets written in English and collected 2045 tweets.</p>
          <p>The 722 authors of the collected tweets were then manually classified as patients (1) or not (0). In total, 3 different annotators, the authors of this paper (MS, YP, and GR), did the labeling process and labeled the users based on their tweets. Each user received a tag of 1 if they had at least one tweet revealing their illness and a tag of 0 otherwise (ie, if none of their tweets suggested that they were patients with IBD).</p>
          <p>Regarding 655 users (n=181, 27.6% patients and n=474, 72.4% other users), the annotators were in complete agreement, and their labels were set. To settle the dispute regarding the other 9.3% (67/722) of the users, the annotators challenged their tweet-based decisions by considering the users’ screen names and biographies and reviewing their timelines if necessary. Considering the new data, of the 67 remaining users, 45 (67%) were classified as patients after explicitly mentioning their illness in their biographies or timelines. A total of 12% (8/67) talked about others who were sick, and the annotators unanimously agreed that they were not patients with IBD themselves. Regarding the remaining 21% (14/67) users, the annotators did not reach a consensus and, therefore, the users were omitted from the data set. The labeling process ended with a collection of 708 tagged users: 226 (31.9%) patients and 482 (68.1%) nonpatients.</p>
          <p>To train the tweet-level classifiers, we had to annotate the tweets manually as well. We addressed the tweets collected in the original search query (in February 2018) and excluded retweets (RTs) from the collection. As the purpose was to identify patients, we were not interested in reshared content and only considered the user’s tendency to RT as a behavioral classification feature. After excluding RTs and the 14 users for whom we did not reach an annotation consensus, we were left with 1687 tweets. To consider the users’ biographies as we did when annotating users, we added each biography as another <italic>tweet</italic> by its author. A total of 83.5% (591/708) of the users had nonempty biographies, and the process resulted in a collection of 2278 <italic>tweets</italic>.</p>
          <p>During the annotation process, we wanted to determine whether a certain tweet revealed that the user was a patient with IBD. Tweets that unambiguously implied that their authors were patients with IBD received a tag of 1, and all others received a tag of 0. As we had already annotated the users, all 1638 tweets written by nonpatient users automatically received a tag of 0. The 3 annotators (MS, YP, and GR) then manually classified all the tweets written by patients. A total of 346 tweets were unanimously classified as 1, and 288 tweets were unanimously classified as 0. The annotators did not reach a consensus on 6 tweets (written by 6 different users), and they were excluded from the collection. All 6 users had at least one more tweet and, therefore, none of them were excluded entirely from our data set. Finally, we reached a collection of 2272 tweets, of which 346 (15.23%) explicitly revealed their authors’ illness.</p>
          <p>To enrich our data, we collected another week of tweets (from June 10, 2018, at noon to June 17, 2018, at noon) for each tagged user, this time without additional filtering. In the months that had passed, 6.6% (47/708) of the users had been either suspended by Twitter or changed their accounts to private, and their data were no longer available for collection. The additional week was collected for the other 93.4% (661/708) of the users, and the process resulted in a data set of 82,884 tweets overall written by 194 patients and 467 nonpatients. We excluded the same 47 users from the tweet data set as well, and the final data set contained 2204 tweets, with 325 (14.75%) positive tweets.</p>
        </sec>
        <sec>
          <title>MI Learning Approach</title>
          <p>Traditional classification problems are supervised learning problems in which one receives a collection of individually labeled instances and tries to predict the class label for new instances. MI learning, by contrast, is a supervised learning approach in which each learning example is a <italic>bag of instances</italic> associated with 1 label, and the task is to predict the labels for unseen bags [<xref ref-type="bibr" rid="ref51">51</xref>].</p>
          <p>Previous research related to identifying health mentions on Twitter has relied on traditional supervised learning to determine whether a <italic>tweet</italic> discusses a health condition [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. However, we wished to determine whether <italic>patients</italic> can be identified on Twitter and not examine the tweets separately. Our unique task and the unbalanced structure of our data were compatible with an MI learning approach—we had 661 users and a different number of tweets posted by each of them. Positive tags (patients) were determined collectively by finding at least one piece of evidence that the user had IBD; negative tags (nonpatients) meant that all the user’s evidence suggested otherwise or, rather, was not sufficient for a positive tag.</p>
          <p>We used the metadata-based MI approach and extracted a vector of metadata for each bag (user) that was not related to any specific instance (tweet) [<xref ref-type="bibr" rid="ref52">52</xref>]. The Classification Features section explains how we applied feature engineering techniques to generate features that characterize the users themselves and not just their tweets.</p>
          <p>To assess the effectiveness of using this collective approach, we compared the results of 5 standard classification algorithms in both user- and tweet-level classification, as explained in detail in the Classification Models section.</p>
        </sec>
        <sec>
          <title>Classification Features</title>
          <sec>
            <title>Overview</title>
            <p>Rao et al [<xref ref-type="bibr" rid="ref38">38</xref>] and Pennacchiotti and Popescu [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>] showed that Twitter users’ demographics and political views could be distinguished by considering 3 types of user classification features: behavioral features (features extracted from the user’s activity on Twitter), linguistic features (features extracted from the content of the user’s tweets), and social structure features (features describing the user’s social network). We followed their work and adapted these types to our different domains of distinguishing patients with IBD from others who talk about the disease. We also integrated MI learning into our classification setting, which was not part of their research. We constructed a set of classification features for each feature type, as explained in detail in the following sections and summarized in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
            <table-wrap position="float" id="table1">
              <label>Table 1</label>
              <caption>
                <p>Summary of classification features and their types.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="30"/>
                <col width="30"/>
                <col width="700"/>
                <col width="240"/>
                <thead>
                  <tr valign="top">
                    <td colspan="3">User classification feature, feature level, and features</td>
                    <td>Type</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td colspan="4">
                      <bold>Behavioral features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td colspan="3">
                      <bold>Tweet-level features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Tweet counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Retweet counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Retweet to tweet ratio</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>IBD<sup>a</sup> flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>User-level IBD ratio</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Crohn flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>User-level Crohn ratio</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Colitis flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>User-level colitis ratio</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td colspan="3">
                      <bold>User-level features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Tweet counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Retweet counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Retweet to tweet ratio</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>IBD counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-IBD flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>IBD ratio</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Crohn counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-Crohn flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Crohn ratio</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Colitis counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-colitis flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Colitis ratio</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="4">
                      <bold>Linguistic features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td colspan="3">
                      <bold>Tweet-level features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Emoji counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Interjection counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Profanity counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Mention counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Hashtag counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>URL flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>First-person flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Number of words</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Number of characters</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Polarity</td>
                    <td>Float (−1 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Positive polarity flag (1 if polarity &#62;0, else 0)</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Negative polarity flag (1 if polarity &#60;0, else 0)</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Subjectivity</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>LDA<sup>b</sup> topic distribution (document=tweet)</td>
                    <td>20×float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td colspan="3">
                      <bold>User-level features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Emoji sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Emoji average</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-emoji counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Interjection sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Interjection average</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-interjection counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Profanity sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Profanity average</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-profanity counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Mention sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Mention average</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-mention counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Hashtag sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Hashtag average</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-hashtag counter</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>URL sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>URL average</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-URL flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>First-person sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>First-person average</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio–first-person flag</td>
                    <td>Binary</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Word average</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-number of words</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Character average</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-number of characters</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-polarity</td>
                    <td>Float (−1 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Positive polarity sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Positive polarity average</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Negative polarity sum</td>
                    <td>Integer</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Negative polarity average</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Subjectivity average</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Bio-subjectivity</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>LDA topic distribution (document=all the user’s tweets)</td>
                    <td>20×float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="4">
                      <bold>Social structure features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td colspan="3">
                      <bold>Tweet-level features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>User-level log in-degree</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>User-level log out-degree</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>User-level closeness</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td colspan="3">
                      <bold>User-level features</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Log in-degree</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Log out-degree</td>
                    <td>Float</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>Closeness</td>
                    <td>Float (0 to 1)</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table1fn1">
                  <p><sup>a</sup>IBD: inflammatory bowel disease.</p>
                </fn>
                <fn id="table1fn2">
                  <p><sup>b</sup>LDA: latent Dirichlet allocation.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Behavioral Features</title>
            <p>Features of this type were designed to capture users’ activity on Twitter: How often do they tweet? Do they write new content or mainly RT others? Furthermore, how often do they refer to IBD? We counted the number of tweets and RTs in our data set and calculated the RT ratio for each user. We counted the number of times they used one of our keywords in their tweets to account for the frequency with which they addressed IBD. Aggregated features for user-level classification were also copied to all the users’ tweets to enrich the tweet-level classification.</p>
          </sec>
          <sec>
            <title>Linguistic Features</title>
            <p>The second class of features is derived from the users’ linguistic style on Twitter: Do they write in first-person voice? Do they tend to use emoticons or add a reference to an external source via URL? We used 2 types of linguistic features. On the basis of previous research [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>] and our data’s nature, we extracted several features from the text that we believed would help the classification.</p>
            <p>Acknowledging that individuals and organizations communicate differently on Twitter [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], we searched for specific characteristics that could distinguish private persons from businesses and help identify patients. We checked specific characteristics for each tweet in our data: Was there use of emojis, interjections, or profanities? Was it written in the first person? Did it point to an external source via URL? Did it contain Twitter special characters indicating mentions (@) or hashtags (#)? We used a Python (Python Software Foundation) library called <italic>TextBlob</italic> to add sentiment-related features such as the text’s polarity and subjectivity. The length of the tweets and the number of words they contained were also considered. The Python library <italic>emoji</italic> was used to detect emojis within the text. A part-of-speech identifier from the library <italic>nltk</italic> was used to indicate the use of first person and identify interjections. On the basis of the Python library <italic>profanity</italic>, we established a list of swear words that we searched for in the text. We had to adjust the list to the special domain of IBD as words related to metabolism were not necessarily swear words.</p>
            <p>We started with tweet-level features, which were later grouped by user to represent personal writing style. To reflect the way a user expresses themselves on Twitter, we excluded RTs from the aggregation. The number of tweets in which the URL was used, for example, was counted on the original tweets only. As the users’ biographies were considered as tweets in the tweet-level classifiers, we added the linguistic features that were extracted from the biographies as bio-features in the user-level classifiers.</p>
            <p>In natural language processing, there are several methods to obtain a vector representation of text. One of the more well-known and well-researched techniques is the Bayesian probabilistic model of text documents called latent Dirichlet allocation (LDA). LDA is a topic modeling technique used for discovering the abstract <italic>topics</italic> that occur in a collection of documents [<xref ref-type="bibr" rid="ref53">53</xref>].</p>
            <p>We used LDA to represent text in both tweet- and user-level classification features. In tweet-level features, each tweet was considered a document, and the representations were obtained per tweet. For user-level features, all tweets by the same author were consolidated into 1 document to obtain representations per user. All the features used unigram and bigram representations of the text after data cleaning. The text cleaning process included converting to lower case, removing punctuation and stop words, and normalizing links and other special signs to standard representations.</p>
          </sec>
          <sec>
            <title>Social Structure Features</title>
            <p>The last type of feature we addressed represented the users’ social connections on Twitter. We used the Twitter API to collect each user’s followers and followees. For each user, we kept the number of followers they had (out-degree in the sense of influence) and the number of followees they had (in-degree) and scaled the results using a logarithmic scale. We also computed the closeness centrality measure for each user. Aggregated features for user-level classification were also copied to all the users’ tweets to enrich the tweet-level classification.</p>
          </sec>
        </sec>
        <sec>
          <title>Classification Models</title>
          <p>Aiming to distinguish between patients with IBD and other users who tweet about IBD, we compared the performances of several classification algorithms within 2 classification approaches: the SI learning approach, which starts by classifying tweets separately and then deduces the user’s class from their tweet-level classification, and the metadata-based MI learning approach, which starts by aggregating tweet-level features to user-level features and then classifies the users themselves.</p>
          <p>The metadata-based MI approach starts by transforming the data from MI to SI, and then a standard SI algorithm can be applied to the transformed problem [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. To achieve the users’ characterization for the MI approach, we applied arithmetic sum and average to the tweet-level features and obtained aggregated features per user (refer to the Classification Features section for more details). Note that this process may cause some information loss [<xref ref-type="bibr" rid="ref56">56</xref>].</p>
          <p>For both approaches, we tested 5 standard and well-known algorithms for binary classification tasks such as ours: AdaBoost, gradient boosting classifier, linear support vector machine, logistic regression, and random forest. All the algorithms were applied from the scikit-learn (sklearn) package in Python [<xref ref-type="bibr" rid="ref57">57</xref>].</p>
        </sec>
        <sec>
          <title>Experiment</title>
          <p>We split our data set by users into training and test sets (approximately 80%-20%). The training set had 155 patients and 377 nonpatients, and the test set had 39 patients and 90 nonpatients; thus, the sets maintained the ratio between the groups.</p>
          <p>In the tweet-level classification, the split into training and test sets was performed based on the split of the users—tweets by users belonging to the training set were ascribed to the tweet training set, whereas tweets by users belonging to the test set were ascribed to the tweet test set. As a result, the tweet training set contained 263 positive tweets and 1586 negative tweets, whereas the test set contained 62 positive tweets and 293 negative tweets.</p>
          <p>We started with a hyperparameter optimization for all algorithms using a 5-fold cross-validation over the training data in both approaches. The values tested for each algorithm and parameter can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <p>In total, 4 common metrics were used to evaluate the models: precision, recall, F<sub>1</sub> score, and the area under the receiver operating characteristic curve (ROC AUC). All 4 metrics were calculated over the positive class that was of interest to us. In our setting, precision depicts the probability that a positive prediction is indeed a patient, recall depicts the classifier’s ability to retrieve patients, and the F<sub>1</sub> score combines the 2. ROC AUC considers the recall of both classes and measures the ability of the model to retrieve patients without collecting a lot of unwanted other users.</p>
          <p>To select the best algorithm variant, we used a 10-fold cross-validation technique for a reliable evaluation of the prediction power. In this process, we randomly divided the training set into 10 equal-sized parts; then, we iteratively performed the training on 9 parts and evaluated the model on the part that was left out. We repeated this iteration 10 times, leaving out a different part each time. In addition, we repeated the 10-fold cross-validation process 10 times with different seed initializations to vary the random split. The performance metrics were computed each time, and the results presented in the Results section show the average across these 100 iterations.</p>
          <p>In the user-level classification, we obtained all 4 metrics during the classification process using the sklearn package in Python. However, in the tweet-level classification, another aggregation stage was needed before obtaining the metrics directly from the sklearn package—the process returned the predictions for each tweet (whether it was written by a patient), and we had to infer the users’ predictions by aggregating the predictions given to their tweets. As in the manual annotation process, if all the user’s tweets received a prediction of 0, the user was considered a nonpatient and received a negative prediction. Alternatively, if the user had at least one positive prediction, they were considered a patient and received a positive prediction. We then used the sklearn package to compute the user-level metrics based on the users’ predictions that we obtained and their true labels.</p>
          <p>Finally, we trained the models from each approach (MI and SI) on the entire training set and evaluated their predictions on the test set. We used built-in sklearn methods for feature importance to investigate the contribution of each feature to both logistic regression and random forest algorithms. The absolute value of the coefficient represents the feature importance for logistic regression.</p>
        </sec>
      </sec>
      <sec>
        <title>Analyzing Patients’ Tweets</title>
        <sec>
          <title>A Corpus of Lifestyle-Related Tweets</title>
          <p>The next aim of this study was to obtain a collection of tweets in which patients describe the lifestyle-related treatments they have tried and their symptoms. By filtering and merging different web-based databases [<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref59">59</xref>], we established a list of 420 words that are types of food or physical activities (ie, lifestyle-related words; the full list can be found in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). The Twitter Premium API was used to search for all tweets that mentioned IBD (containing at least one of the 3 keywords described in the Data Collection and Preparation section: <italic>crohn</italic>, <italic>colitis</italic>, and <italic>#IBD</italic>) and at least one of the 420 lifestyle-related words. To build the search query, we used the OR operator within the IBD keywords and the lifestyle-related words and then connected the 2 groups using the AND operator.</p>
          <p>We searched for relevant tweets from January 1, 2019, to September 30, 2019. We excluded RTs and duplicated tweets from the search and limited the search to tweets written in English. The search resulted in 20,136 unique tweets containing new content written by 8519 different users.</p>
          <p>We used the classifier from the first part of the study on the new data we gathered to classify the tweets as patients’ tweets and user tweets. We needed to recreate the classification features for the new set of 8519 users. As we did in the first stage, we collected another week of tweets for all the users from October 1, 2019, to October 7, 2019, without keyword filtering and including RTs. A total of 39.52% (3367/8519) of the users were private, suspended, or otherwise unavailable. The process resulted in a data set of 5152 users who authored 402,843 tweets overall.</p>
          <p>We constructed all the classification features described in the Classification Features section on the new data except for the closeness centrality. Obtaining this feature was costly and time-consuming as it was the only feature that required collecting all followers and followees for each user and building their Twitter network. As it was not one of the 10 most helpful classification features, we decided to omit it.</p>
          <p>We then used the MI random forest model we trained in the first stage (refer to the Classification Models section for more details) to classify the users and identify patients. A total of 45.79% (2359/5152) of the users were classified as patients, and they authored 4160 of the original tweets containing our keywords. We performed a simple text cleaning of those tweets by removing all screen names (identified by the @ character) and URLs and continued our analysis with the 4160 clean tweets.</p>
        </sec>
        <sec>
          <title>Sentiment Analysis of Lifestyle-Related Words</title>
          <p>The Natural Language Understanding (NLU) module by IBM Cloud [<xref ref-type="bibr" rid="ref60">60</xref>] was used to apply category classification and keyword extraction to each of our tweets. The category classification feature aims to identify the theme of the text. Given a text, the NLU module provides a list of possible categories and subcategories and their corresponding likelihoods. The keyword extraction feature recognizes words and phrases of high importance within the text and calculates their sentiments. Given a text, the NLU module returns a list of keywords and their corresponding sentiments represented as scores on the closed interval of −1 to 1: −1 for extremely negative sentiment and 1 for extremely positive sentiment. A score of 0 means that the keyword was mentioned in a neutral context. The <italic>TextBlob</italic> library used for sentiment analysis in the Linguistic Features section only enables full-text sentiment analysis and does not support entity-level sentiment analysis. Although it was free and easy to use, it did not suit our new task and, therefore, we chose to replace it with the NLU module.</p>
          <p>The goal was to identify the lifestyle-related treatments that patients undergo to manage their disease and determine their sentiments toward them. Hence, we focused our analysis on keywords related to health and nutrition. We grouped all tweets that were categorized by the NLU module as related to <italic>health and fitness</italic> (2080 tweets), <italic>food and drink</italic> (1568 tweets), or <italic>religion and spirituality</italic> (15 tweets). Overall, 3663 tweets were selected for keyword sentiment analysis. We gathered all the keywords that appeared in our predefined list of lifestyle-related words and their corresponding sentiments within each tweet. In total, 3 examples of this process are presented in <xref ref-type="table" rid="table2">Table 2</xref>. Notice how, in the second example, the first word of the original tweet (marked with the @ symbol) is a screen name and was therefore removed in the cleaning process.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Three examples of category classification and keyword sentiment extraction after text cleaning.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="110"/>
              <col width="300"/>
              <col width="260"/>
              <col width="180"/>
              <col width="150"/>
              <thead>
                <tr valign="top">
                  <td>Number</td>
                  <td>Original text</td>
                  <td>Text after cleaning</td>
                  <td>Category classification</td>
                  <td>Keyword sentiment</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>1</td>
                  <td>Spinach is an inflammatory food with a lot of sulfur. Ban that too. (I noticed my Crohn’s tended to flare around spinach season.)</td>
                  <td>Spinach is an inflammatory food with a lot of sulfur. Ban that too. (I noticed my Crohn’s tended to flare around spinach season.)</td>
                  <td>Food and drink</td>
                  <td>Spinach: −0.63</td>
                </tr>
                <tr valign="top">
                  <td>2</td>
                  <td>@bottomline_ibd great poll. I do have the odd binge, but IBD has changed what I can drink. No more red wine or ale <inline-graphic xlink:href="jmir_v24i8e29186_fig4.png" xlink:type="simple" mimetype="image"/></td>
                  <td>great poll. I do have the odd binge, but IBD has changed what I can drink. No more red wine or ale <inline-graphic xlink:href="jmir_v24i8e29186_fig4.png" xlink:type="simple" mimetype="image"/></td>
                  <td>Food and drink</td>
                  <td>Red wine: −0.83; ale: −0.83</td>
                </tr>
                <tr valign="top">
                  <td>3</td>
                  <td>I am living proof that yoga can help #uchicagoibd #studiothree #yoga #ibd</td>
                  <td>I am living proof that yoga can help #uchicagoibd #studiothree #yoga #ibd</td>
                  <td>Religion and spirituality</td>
                  <td>Yoga: 0.69</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>To examine the effectiveness of each lifestyle-related phrase (lifestyle, in short) and to assess its overall sentiment, we aggregated the results by lifestyle and calculated the following statistics: the total number of times the lifestyle appeared in all tweets, the number of times it appeared in a positive (or negative) context, the positive to negative ratio of the number of appearances (odds), and the mean sentiment of the lifestyle.</p>
          <p>We used the statistics to build a co-occurrence network that visualized the connections between lifestyles and their mean sentiments. The different lifestyles were the nodes, and an arc connected 2 lifestyles if they appeared in the same tweet. The more times they appeared together, the stronger the connection between the lifestyles was. Therefore, the resulting network was undirected and weighted by the number of times the lifestyles co-occurred. The purpose was to identify helpful lifestyles (frequently mentioned in a positive context) and lifestyles that it is better to avoid (frequently mentioned in a negative context) and examine whether certain lifestyles tend to be implemented together.</p>
          <p>The network was obtained using <italic>Gephi</italic> software (GNU General Public License) for network analysis and visualization. Each node was colored on a scale from green to red based on the mean sentiment of the lifestyle it represented, with green being very positive and red being very negative. The sizing of the nodes reflected the number of times the lifestyles were mentioned in the tweet database: the more times they appeared, the larger their nodes were. The thickness of each arc represented the number of times the 2 lifestyles it connected co-occurred: the thicker the arc, the more times the 2 lifestyles appeared together. To avoid obtaining an overdense network, we only considered the nodes of lifestyles mentioned at least five times in our database. We included arcs between lifestyles that co-occurred at least four times. The process resulted in 144 lifestyles presented in the network and sorted in a table by mean sentiment.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Note</title>
        <p>The collection and analysis of Twitter data may entail ethical challenges that should be addressed and handled properly. Twitter data are public and available for research via Twitter APIs. By accepting Twitter’s Terms of Service and Privacy Policy, Twitter users acknowledge that their tweets can be viewed instantly worldwide and that their information may be collected by third parties [<xref ref-type="bibr" rid="ref61">61</xref>]. Nonetheless, social media studies have revealed that users on Twitter feel as if they are engaged in a private conversation with their followees and followers [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>]. Although they are generally not concerned with their posts being used for research purposes, they expect anonymity in publication and to be asked for their consent before publication.</p>
        <p>Obtaining informed consent from all the users who <italic>participate</italic> in research on Twitter data may be unfeasible. Data sets are likely to be large and involve many authors [<xref ref-type="bibr" rid="ref61">61</xref>-<xref ref-type="bibr" rid="ref63">63</xref>]. Individually seeking consent from all 722 users in our study would be labor-intensive or impossible as some might be unreachable. Moreover, providing total anonymity to users while directly quoting their content is not practical; tweets are easily searchable, leaving their authors vulnerable to identification.</p>
        <p>To adhere to ethical norms and maintain user privacy, we only published aggregated results that do not reveal the specific users. The 3 examples containing direct quotes from tweets (in <xref ref-type="table" rid="table2">Table 2</xref>) are presented in this study after obtaining informed consent from their authors.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Patient Identification</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the 10-fold cross-validation and test results for the 2 classification approaches: SI classifying tweets and MI classifying users. The table shows the results of the 4 metrics for all 5 classification algorithms.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>The 10-fold cross-validation and test results for the single instance (SI) and multiple instance (MI) classifications.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="260"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Algorithm and metric</td>
                <td colspan="5">SI tweet-level classification</td>
                <td colspan="3">MI user-level classification</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">10-fold</td>
                <td colspan="2">Test</td>
                <td colspan="3">10-fold</td>
                <td>Test</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="11">
                  <bold>AdaBoost</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td colspan="2">0.6775</td>
                <td colspan="2">0.7241</td>
                <td colspan="3">0.6151</td>
                <td colspan="2">0.5902</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Recall</td>
                <td colspan="2">0.6297</td>
                <td colspan="2">0.5385</td>
                <td colspan="3">0.7284</td>
                <td colspan="2">0.9231</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">0.6525</td>
                <td colspan="2">0.6176</td>
                <td colspan="3">0.6542</td>
                <td colspan="2">0.7200</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ROC AUC<sup>a</sup></td>
                <td colspan="2">0.7532</td>
                <td colspan="2">0.7248</td>
                <td colspan="3">0.8469</td>
                <td colspan="2">0.8226</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Gradient boosting classifier</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td colspan="2">0.7416</td>
                <td colspan="2">0.6471</td>
                <td colspan="3">0.6668</td>
                <td colspan="2">0.6735</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Recall</td>
                <td colspan="2">0.6465</td>
                <td colspan="2">0.5641</td>
                <td colspan="3">0.6778</td>
                <td colspan="2">0.8462</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">0.6906</td>
                <td colspan="2">0.6027</td>
                <td colspan="3">0.6711</td>
                <td colspan="2">0.7500</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ROC AUC</td>
                <td colspan="2">0.7768</td>
                <td colspan="2">0.7154</td>
                <td colspan="3">0.8658</td>
                <td colspan="2">0.8342</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Linear SVM<sup>b</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td colspan="2">0.7249</td>
                <td colspan="2">0.6667</td>
                <td colspan="3">0.6648</td>
                <td colspan="2">0.5814</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Recall</td>
                <td colspan="2">0.6832</td>
                <td colspan="2">0.7179</td>
                <td colspan="3">0.6398</td>
                <td colspan="2">0.6410</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">0.7034</td>
                <td colspan="2">0.6914</td>
                <td colspan="3">0.6472</td>
                <td colspan="2">0.6098</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ROC AUC</td>
                <td colspan="2">0.7883</td>
                <td colspan="2">0.7812</td>
                <td colspan="3">0.8463</td>
                <td colspan="2">0.7205</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Logistic regression</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td colspan="2">0.7405</td>
                <td colspan="2">0.6333</td>
                <td colspan="3">0.6594</td>
                <td colspan="2">0.6250</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Recall</td>
                <td colspan="2">0.6335</td>
                <td colspan="2">0.4872</td>
                <td colspan="3">0.6358</td>
                <td colspan="2">0.6410</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">0.6829</td>
                <td colspan="2">0.5507</td>
                <td colspan="3">0.6423</td>
                <td colspan="2">0.6329</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ROC AUC</td>
                <td colspan="2">0.7712</td>
                <td colspan="2">0.6825</td>
                <td colspan="3">0.8473</td>
                <td colspan="2">0.7372</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Random forest</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td colspan="2">0.7676</td>
                <td colspan="2">0.7333</td>
                <td colspan="3">0.6721</td>
                <td colspan="2">0.6444</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Recall</td>
                <td colspan="2">0.4355</td>
                <td colspan="2">0.2821</td>
                <td colspan="3">0.6646</td>
                <td colspan="2">0.7436</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">0.5555</td>
                <td colspan="2">0.4074</td>
                <td colspan="3">0.6595</td>
                <td colspan="2">0.6905</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ROC AUC</td>
                <td colspan="2">0.6906</td>
                <td colspan="2">0.6188</td>
                <td colspan="3">0.8722</td>
                <td colspan="2"> 0.7829</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>ROC AUC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>SVM: support vector machine.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Both approaches showed satisfactory classification results for the patient classes. Although the precision rates were slightly higher for the SI approach, the recall index of the MI approach was better, and the results for the ROC AUC measure were consistently higher in the MI approach. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the differences among the 4 measures within the test set results.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Test result comparison between the 2 classification approaches. MI: multiple instance; ROC AUC: area under the receiver operating characteristic curve; SI: single instance; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="jmir_v24i8e29186_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Investigating the contribution of each feature to both the logistic regression and random forest algorithms showed the importance of the use of first-person voice. In both classification approaches and algorithms, the most important feature was the use of the first person, which had a significant advantage over the other features. The first-person flag was the best feature of the SI approach, and its average was the best feature of the MI approach. Another dominant feature was the use of profanities as it was one of the most significant features in both approaches and algorithms.</p>
        <p>The analysis also highlighted the importance of the LDA features derived from the text. The second-best feature of the SI approach was LDA topic 11 for both the logistic regression and random forest algorithms. This was the only topic that did not contain IBD-related words. The fourth and fifth most important topics of the MI approach were identical for both algorithms—LDA topics 17 and 9, respectively. The LDA topics that were created over the training data for each approach can be found in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
      </sec>
      <sec>
        <title>Analyzing Patients’ Tweets</title>
        <p>In the second stage of the study, a network of connections between lifestyles was built and visualized. The obtained network describing the relationships between the different lifestyles can be found in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
        <p>The most frequent word in our database was <italic>diet</italic>, encapsulating all the nutritional adjustments that patients undergo to manage their disease. Specific diets such as <italic>paleo</italic>, <italic>vegetarian</italic>, or <italic>liquid</italic> diets also surfaced and in a negative context.</p>
        <p>It is interesting to note that the negative and positive lifestyles revealed by the analysis were in line with what is known about suitable nutrition for IBD. Among the most negative lifestyles (by mean sentiment), we found <italic>alcohol, milk, spicy, cabbage, flour, lentil,</italic> and <italic>orange juice</italic>, all known to cause inflammation and irritate the stomach. Among the most positive lifestyles (by mean sentiment), we found activity-related lifestyles such as <italic>fitness</italic> or <italic>yoga</italic> and healing foods such as <italic>salmon</italic>, <italic>ginger</italic>, and <italic>garlic</italic>. The most positive lifestyle turned out to be <italic>sushi</italic>, which usually contains anti-inflammatory ingredients such as <italic>salmon</italic> or <italic>tuna</italic>, <italic>seaweed</italic>, and <italic>rice</italic>. <xref ref-type="table" rid="table4">Table 4</xref> presents the 20 most positive and 20 most negative lifestyle-related words sorted by mean sentiment.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The 20 most positive and 20 most negative lifestyles sorted by mean sentiment.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="160"/>
            <col width="110"/>
            <col width="220"/>
            <col width="160"/>
            <col width="160"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td>Rank</td>
                <td>Keyword</td>
                <td>Count</td>
                <td>Sentiment, mean (SD)</td>
                <td>Count of positive</td>
                <td>Count of negative</td>
                <td>Odds</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Sushi</td>
                <td>9</td>
                <td>0.466 (0.814)</td>
                <td>7</td>
                <td>2</td>
                <td>3.500</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Ginger ale</td>
                <td>5</td>
                <td>0.407 (0.597)</td>
                <td>3</td>
                <td>1</td>
                <td>3.000</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Salmon</td>
                <td>7</td>
                <td>0.344 (0.691)</td>
                <td>4</td>
                <td>3</td>
                <td>1.333</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Cherry</td>
                <td>10</td>
                <td>0.33 (0.696)</td>
                <td>6</td>
                <td>2</td>
                <td>3.000</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Breakfast</td>
                <td>29</td>
                <td>0.28 (0.75)</td>
                <td>19</td>
                <td>9</td>
                <td>2.111</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Garlic</td>
                <td>8</td>
                <td>0.244 (0.671)</td>
                <td>4</td>
                <td>2</td>
                <td>2.000</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Bagel</td>
                <td>5</td>
                <td>0.224 (0.633)</td>
                <td>3</td>
                <td>1</td>
                <td>3.000</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Almond</td>
                <td>9</td>
                <td>0.193 (0.668)</td>
                <td>6</td>
                <td>3</td>
                <td>2.000</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Yogurt</td>
                <td>14</td>
                <td>0.189 (0.688)</td>
                <td>7</td>
                <td>3</td>
                <td>2.333</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Yoga</td>
                <td>15</td>
                <td>0.186 (0.693)</td>
                <td>7</td>
                <td>5</td>
                <td>1.400</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>Ham</td>
                <td>5</td>
                <td>0.184 (0.535)</td>
                <td>2</td>
                <td>1</td>
                <td>2.000</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>Biscuit</td>
                <td>13</td>
                <td>0.172 (0.75)</td>
                <td>8</td>
                <td>5</td>
                <td>1.600</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>Spinach</td>
                <td>6</td>
                <td>0.171 (0.76)</td>
                <td>4</td>
                <td>2</td>
                <td>2.000</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>Vegan cheese</td>
                <td>5</td>
                <td>0.164 (0.92)</td>
                <td>3</td>
                <td>2</td>
                <td>1.500</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>Lamb</td>
                <td>5</td>
                <td>0.14 (0.861)</td>
                <td>3</td>
                <td>2</td>
                <td>1.500</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>Cake</td>
                <td>26</td>
                <td>0.13 (0.752)</td>
                <td>16</td>
                <td>9</td>
                <td>1.778</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>Fitness</td>
                <td>19</td>
                <td>0.114 (0.728)</td>
                <td>9</td>
                <td>6</td>
                <td>1.500</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>Ginger</td>
                <td>17</td>
                <td>0.112 (0.724)</td>
                <td>8</td>
                <td>7</td>
                <td>1.143</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>Tomato</td>
                <td>10</td>
                <td>0.089 (0.608)</td>
                <td>5</td>
                <td>3</td>
                <td>1.667</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>Cafe</td>
                <td>7</td>
                <td>0.081 (0.783)</td>
                <td>3</td>
                <td>3</td>
                <td>1.000</td>
              </tr>
              <tr valign="top">
                <td>125</td>
                <td>Fodmap</td>
                <td>12</td>
                <td>−0.501 (0.573)</td>
                <td>2</td>
                <td>9</td>
                <td>0.222</td>
              </tr>
              <tr valign="top">
                <td>126</td>
                <td>Cocktail</td>
                <td>5</td>
                <td>−0.51 (0.769)</td>
                <td>1</td>
                <td>4</td>
                <td>0.250</td>
              </tr>
              <tr valign="top">
                <td>127</td>
                <td>Fiber</td>
                <td>63</td>
                <td>−0.512 (0.547)</td>
                <td>7</td>
                <td>47</td>
                <td>0.149</td>
              </tr>
              <tr valign="top">
                <td>128</td>
                <td>Spicy</td>
                <td>37</td>
                <td>−0.514 (0.572)</td>
                <td>7</td>
                <td>28</td>
                <td>0.250</td>
              </tr>
              <tr valign="top">
                <td>129</td>
                <td>Vegetable</td>
                <td>49</td>
                <td>−0.533 (0.529)</td>
                <td>6</td>
                <td>39</td>
                <td>0.154</td>
              </tr>
              <tr valign="top">
                <td>130</td>
                <td>Corn</td>
                <td>28</td>
                <td>−0.534 (0.487)</td>
                <td>2</td>
                <td>22</td>
                <td>0.091</td>
              </tr>
              <tr valign="top">
                <td>131</td>
                <td>Alcohol</td>
                <td>64</td>
                <td>−0.545 (0.545)</td>
                <td>9</td>
                <td>51</td>
                <td>0.176</td>
              </tr>
              <tr valign="top">
                <td>132</td>
                <td>Milkshake</td>
                <td>5</td>
                <td>−0.556 (0.811)</td>
                <td>1</td>
                <td>4</td>
                <td>0.250</td>
              </tr>
              <tr valign="top">
                <td>133</td>
                <td>Milk</td>
                <td>44</td>
                <td>−0.565 (0.5)</td>
                <td>4</td>
                <td>35</td>
                <td>0.114</td>
              </tr>
              <tr valign="top">
                <td>134</td>
                <td>Vegetarian diet</td>
                <td>10</td>
                <td>−0.567 (0.409)</td>
                <td>1</td>
                <td>8</td>
                <td>0.125</td>
              </tr>
              <tr valign="top">
                <td>135</td>
                <td>Snack</td>
                <td>10</td>
                <td>−0.573 (0.568)</td>
                <td>2</td>
                <td>8</td>
                <td>0.250</td>
              </tr>
              <tr valign="top">
                <td>136</td>
                <td>Fig</td>
                <td>5</td>
                <td>−0.578 (0.621)</td>
                <td>1</td>
                <td>4</td>
                <td>0.250</td>
              </tr>
              <tr valign="top">
                <td>137</td>
                <td>Turkey</td>
                <td>10</td>
                <td>−0.608 (0.626)</td>
                <td>2</td>
                <td>8</td>
                <td>0.250</td>
              </tr>
              <tr valign="top">
                <td>138</td>
                <td>Yeast</td>
                <td>16</td>
                <td>−0.624 (0.391)</td>
                <td>1</td>
                <td>13</td>
                <td>0.077</td>
              </tr>
              <tr valign="top">
                <td>139</td>
                <td>Orange</td>
                <td>7</td>
                <td>−0.638 (0.449)</td>
                <td>0</td>
                <td>5</td>
                <td>0.000</td>
              </tr>
              <tr valign="top">
                <td>140</td>
                <td>Beverage</td>
                <td>7</td>
                <td>−0.661 (0.616)</td>
                <td>1</td>
                <td>6</td>
                <td>0.167</td>
              </tr>
              <tr valign="top">
                <td>141</td>
                <td>Cabbage</td>
                <td>8</td>
                <td>−0.675 (0.19)</td>
                <td>0</td>
                <td>8</td>
                <td>0.000</td>
              </tr>
              <tr valign="top">
                <td>142</td>
                <td>Orange juice</td>
                <td>5</td>
                <td>−0.682 (0.385)</td>
                <td>0</td>
                <td>4</td>
                <td>0.000</td>
              </tr>
              <tr valign="top">
                <td>143</td>
                <td>Flour</td>
                <td>6</td>
                <td>−0.785 (0.211)</td>
                <td>0</td>
                <td>6</td>
                <td>0.000</td>
              </tr>
              <tr valign="top">
                <td>144</td>
                <td>Lentil</td>
                <td>6</td>
                <td>−0.785 (0.188)</td>
                <td>0</td>
                <td>6</td>
                <td>0.000</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study presents a workflow for identifying patients with IBD on Twitter and exploring their tweets. The aim was to identify patients with IBD based on the way they communicate on Twitter and to learn from the personal experiences they share.</p>
        <p>In the first stage of the study, a classifier of Twitter users designed to distinguish patients with IBD from other users was constructed and evaluated. Classification features combining social data and text analysis were extracted from the users’ activity on Twitter, their social connections, and the content of their tweets. Various classification algorithms were considered, and 4 evaluation measures were calculated for each of them. The encouraging results shown in the previous section helped convince us that patients with IBD can be identified on Twitter based on such features.</p>
        <p>Classification results from both the SI and MI approaches show that patients with IBD differ in the way they communicate on Twitter from other users who tweet about the disease. They talk in the first person more often and use more profanities in their tweets. These gaps, which can be explained by the fact that patients are private individuals whereas nonpatients also include organizations and voluntary associations that communicate in a much more formal manner, helped distinguish patients from other entities in the different classification models we tried in this study.</p>
        <p>Our analysis differs from previous research regarding user classification on Twitter [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>] in 2 aspects. Conceptually, we investigate a different domain and try to identify patients on Twitter. Practically, we compare the results from the user-level classification with a tweet-level classification.</p>
        <p>In the second stage of the study, tweets of patients with IBD were collected to investigate the different lifestyles they implemented to deal with their disease and assess these lifestyles’ effectiveness. Unlike previous research on patients’ sentiments on Twitter [<xref ref-type="bibr" rid="ref48">48</xref>-<xref ref-type="bibr" rid="ref50">50</xref>], we focused our research on entity sentiment for specific words rather than the entire tweet’s sentiment. We suggested a novel approach by considering entity sentiment analysis to obtain patients’ sentiments toward the different nutrition and fitness-based solutions they try. These findings were in line with what is known about IBD as several foods known to cause inflammation were pointed out in a negative sentiment, whereas relaxing activities and anti-inflammatory foods surfaced in a positive context.</p>
        <p>This study suggests that there is room for collaboration between physicians and engineers regarding understanding chronic diseases. Owing to the chronic nature of the disease and the fact that it involves bowel movements, patients with IBD are compelled to follow special nutrition and maintain a calm routine. By collecting and analyzing patients’ personal experiences on social media, we can monitor patients’ lifestyles and support medical knowledge of IBD. We can identify and assess complementary treatments to diets and physical activity and maybe ease patients’ processes of finding the right treatments for them. Although such analysis should not strive to replace physicians or draw conclusions of a clinical nature, it may provide complementary recommendations for healthy lifestyles based on the wisdom of the crowd.</p>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <sec>
          <title>Overview</title>
          <p>The focus of this study was on showing the potential of identifying patients with IBD on Twitter and learning from their tweets. This study emphasized the entire process, and we did not perfect each part separately. As this section explains, each part can be improved by trying different methods and enriching the analysis.</p>
        </sec>
        <sec>
          <title>Patient Identification</title>
          <p>The classifier developed in the first stage of this study uses 1-level, binary classification to separate patients with IBD from other users who tweet about the disease. Some of its features distinguish organizations from individuals in general and do not necessarily detect patients, such as the use of the first person in the tweet. Therefore, our nonpatient class is heterogeneous and somewhat ambiguous, containing both organizations that significantly differ from patients in their communication patterns and healthy individuals who differ from patients in a more refined manner. Even during the manual labeling process, all 14 users excluded from the data set owing to classification disagreements were individuals talking in the first-person voice.</p>
          <p>A possible direction for future work would be to try a 2-step classification: separating persons from organizations and continuing by searching for patients among these individuals. It can improve the robustness of some of the features by overcoming the heterogeneity of the nonpatient class in our model. Alternatively, we could try replacing the binary classification with a multinomial one that will capture not only organizations and patients but also individuals who talk about the disease and maybe mention other patients but are not sick themselves.</p>
          <p>During the construction of the network-based features, we only collected immediate connections on Twitter (ie, the followers and followees of each patient). The sampling method resulted in basic network features that mainly included degree measures. We encourage future research to consider more interesting network features such as other centrality measures or structures. Such enhancement will require collecting at least one more level of connections (eg, followees of followees) to understand network patterns better.</p>
          <p>Finally, the classifier uses standard classification algorithms and did not try current state-of-the-art learning techniques based on neural networks. Text representation using word embeddings, where words are mapped to vectors of real numbers in a predefined vector space [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>], is also worth examining.</p>
        </sec>
        <sec>
          <title>Analyzing Patients’ Tweets</title>
          <p>The NLU module by IBM Cloud was used in this study for entity sentiment analysis as a proof of concept. We did not evaluate its results or compare them with similar tools available in the market, such as the Natural Language API by Google Cloud. Future research should consider performing similar analyses with different natural language processing tools and comparing their results. Even training designated algorithms on data from lifestyle-related tweets such as those used in this study can benefit the analysis.</p>
          <p>Overall, the results for the second part are preliminary, and much more can be done to understand what patients with IBD are talking about on Twitter. For example, by characterizing treatment options and patients’ sentiments toward them, one can derive recommendations for a healthy lifestyle based on the wisdom of the crowd. Thoroughly exploring outliers, such as the 4 positive mentions of milk as opposed to the 35 negative ones, can reveal new information regarding the disease that has not yet been covered in the literature.</p>
        </sec>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In the era of personalized medicine and patient-centered care, it is important to derive insights that reflect the patients’ perspectives as manifested in social media. Although the time between physician appointments can be lengthy, messages on social media are being posted each day, and patients constantly use them to exchange inputs and recommendations.</p>
        <p>This study provides a potential pipeline for identifying patients with chronic illnesses on Twitter and collecting their tweets to analyze the experimental knowledge they share on the web. The method presented in this study was applied to IBD and can also help explore other medical conditions. The classifier for IBD-related entities can be adapted to identify other patients with chronic illnesses. The analysis of patients’ tweets can benefit research on other chronic conditions with similar characteristics. With conditions such as celiac disease or diabetes, which involve strict dietary guidelines, one can better understand patients’ difficulties with adherence to their new lifestyles. When considering diseases that cause embarrassment, such as HIV, one can learn more about the constant struggle of patients living with the disease.</p>
        <p>Therefore, the contribution of this study is 2-fold: it provides an analytical contribution to the fields of text mining and social media and a practical contribution by better understanding chronic conditions and promoting a healthy lifestyle for patients with chronic illnesses.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Parameter optimization for classification algorithms.</p>
        <media xlink:href="jmir_v24i8e29186_app1.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>A list of 420 lifestyle-related words.</p>
        <media xlink:href="jmir_v24i8e29186_app2.docx" xlink:title="DOCX File , 39 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Latent Dirichlet allocation topics created over the training data for each classification approach.</p>
        <media xlink:href="jmir_v24i8e29186_app3.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>A network of relationships between lifestyle-related words.</p>
        <media xlink:href="jmir_v24i8e29186_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 148 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">IBD</term>
          <def>
            <p>inflammatory bowel disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">MI</term>
          <def>
            <p>multiple instance</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLU</term>
          <def>
            <p>Natural Language Understanding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ROC AUC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RT</term>
          <def>
            <p>retweet</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SI</term>
          <def>
            <p>single instance</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by a grant from the European Research Area Network Cofund Healthy Diet for a Healthy Life-Intestinal Microbiomics under the Joint Programming Initiative <italic>A healthy diet for a healthy life</italic> umbrella.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Fabbri</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenbloom</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A scalable framework to detect personal health mentions on Twitter</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>06</month>
          <day>05</day>
          <volume>17</volume>
          <issue>6</issue>
          <fpage>e138</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/6/e138/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.4305</pub-id>
          <pub-id pub-id-type="medline">26048075</pub-id>
          <pub-id pub-id-type="pii">v17i6e138</pub-id>
          <pub-id pub-id-type="pmcid">PMC4526910</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karisani</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Agichtein</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Did you really just have a heart attack? Towards robust detection of personal health mentions in social media</article-title>
          <source>Proceedings of the 2018 World Wide Web Conference</source>
          <year>2018</year>
          <conf-name>WWW '18</conf-name>
          <conf-date>April 23-27, 2018</conf-date>
          <conf-loc>Lyon, France</conf-loc>
          <fpage>137</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1145/3178876.3186055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Tracking social media discourse about the COVID-19 pandemic: development of a public coronavirus Twitter data set</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>05</month>
          <day>29</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19273</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e19273/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19273</pub-id>
          <pub-id pub-id-type="medline">32427106</pub-id>
          <pub-id pub-id-type="pii">v6i2e19273</pub-id>
          <pub-id pub-id-type="pmcid">PMC7265654</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jahanbin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rahmanian</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter and Web news mining to predict COVID-19 outbreak</article-title>
          <source>Asian Pac J Trop Med</source>
          <year>2020</year>
          <volume>13</volume>
          <issue>8</issue>
          <fpage>378</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.4103/1995-7645.279651</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lopreite</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Panzarasa</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Puliga</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Riccaboni</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Early warnings of COVID-19 outbreaks across Europe from social media</article-title>
          <source>Sci Rep</source>
          <year>2021</year>
          <month>01</month>
          <day>25</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>2147</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-021-81333-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-021-81333-1</pub-id>
          <pub-id pub-id-type="medline">33495534</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-021-81333-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7835375</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gabarron</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dorronzoro</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rivera-Romero</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Wynn</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Diabetes on Twitter: a sentiment analysis</article-title>
          <source>J Diabetes Sci Technol</source>
          <year>2019</year>
          <month>05</month>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>439</fpage>
          <lpage>44</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30453762"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1932296818811679</pub-id>
          <pub-id pub-id-type="medline">30453762</pub-id>
          <pub-id pub-id-type="pmcid">PMC6501536</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kar</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A new dawn: the role of social media in diabetes education</article-title>
          <source>J Diabetes Nurs</source>
          <year>2014</year>
          <month>1</month>
          <volume>18</volume>
          <issue>2</issue>
          <fpage>68</fpage>
          <lpage>71</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beguerisse-Díaz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McLennan</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Garduño-Hernández</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Barahona</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ulijaszek</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>The 'who' and 'what' of #diabetes on Twitter</article-title>
          <source>Digit Health</source>
          <year>2017</year>
          <month>1</month>
          <day>1</day>
          <volume>3</volume>
          <fpage>2055207616688841</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/2055207616688841?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/2055207616688841</pub-id>
          <pub-id pub-id-type="medline">29942579</pub-id>
          <pub-id pub-id-type="pii">10.1177_2055207616688841</pub-id>
          <pub-id pub-id-type="pmcid">PMC6001201</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sugawara</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Narimatsu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hozawa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Otani</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Fukao</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Cancer patients on Twitter: a novel patient community on social media</article-title>
          <source>BMC Res Notes</source>
          <year>2012</year>
          <month>12</month>
          <day>27</day>
          <volume>5</volume>
          <fpage>699</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcresnotes.biomedcentral.com/articles/10.1186/1756-0500-5-699"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1756-0500-5-699</pub-id>
          <pub-id pub-id-type="medline">23270426</pub-id>
          <pub-id pub-id-type="pii">1756-0500-5-699</pub-id>
          <pub-id pub-id-type="pmcid">PMC3599295</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tsuya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sugawara</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Narimatsu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Do cancer patients tweet? Examining the twitter use of cancer patients in Japan</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>05</month>
          <day>27</day>
          <volume>16</volume>
          <issue>5</issue>
          <fpage>e137</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2014/5/e137/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3298</pub-id>
          <pub-id pub-id-type="medline">24867458</pub-id>
          <pub-id pub-id-type="pii">v16i5e137</pub-id>
          <pub-id pub-id-type="pmcid">PMC4060148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>The global burden of IBD: from 2015 to 2025</article-title>
          <source>Nat Rev Gastroenterol Hepatol</source>
          <year>2015</year>
          <month>12</month>
          <volume>12</volume>
          <issue>12</issue>
          <fpage>720</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1038/nrgastro.2015.150</pub-id>
          <pub-id pub-id-type="medline">26323879</pub-id>
          <pub-id pub-id-type="pii">nrgastro.2015.150</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loftus Jr</surname>
              <given-names>EV</given-names>
            </name>
          </person-group>
          <article-title>Clinical epidemiology of inflammatory bowel disease: incidence, prevalence, and environmental influences</article-title>
          <source>Gastroenterology</source>
          <year>2004</year>
          <month>05</month>
          <volume>126</volume>
          <issue>6</issue>
          <fpage>1504</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1053/j.gastro.2004.01.063</pub-id>
          <pub-id pub-id-type="medline">15168363</pub-id>
          <pub-id pub-id-type="pii">S0016508504004627</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roccetti</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Marfia</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Salomoni</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Prandi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zagari</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Gningaye Kengni</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Bazzoli</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Montagnani</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Attitudes of Crohn's disease patients: infodemiology case study and sentiment analysis of Facebook and Twitter posts</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2017</year>
          <month>08</month>
          <day>09</day>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>e51</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2017/3/e51/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.7004</pub-id>
          <pub-id pub-id-type="medline">28793981</pub-id>
          <pub-id pub-id-type="pii">v3i3e51</pub-id>
          <pub-id pub-id-type="pmcid">PMC5569247</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Trivedi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Keefer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>The emerging adult with inflammatory bowel disease: challenges and recommendations for the adult gastroenterologist</article-title>
          <source>Gastroenterol Res Pract</source>
          <year>2015</year>
          <volume>2015</volume>
          <fpage>260807</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2015/260807"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2015/260807</pub-id>
          <pub-id pub-id-type="medline">26064089</pub-id>
          <pub-id pub-id-type="pmcid">PMC4434201</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Norton</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lomax</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley-Brown</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Patient perspectives on the impact of Crohn's disease: results from group interviews</article-title>
          <source>Patient Prefer Adherence</source>
          <year>2012</year>
          <volume>6</volume>
          <fpage>509</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.2147/PPA.S32690"/>
          </comment>
          <pub-id pub-id-type="doi">10.2147/PPA.S32690</pub-id>
          <pub-id pub-id-type="medline">22879737</pub-id>
          <pub-id pub-id-type="pii">ppa-6-509</pub-id>
          <pub-id pub-id-type="pmcid">PMC3413071</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Dubinsky</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Panaccione</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Binion</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Hopper</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The impact of ulcerative colitis on patients' lives compared to other chronic diseases: a patient survey</article-title>
          <source>Dig Dis Sci</source>
          <year>2010</year>
          <month>04</month>
          <volume>55</volume>
          <issue>4</issue>
          <fpage>1044</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1007/s10620-009-0953-7</pub-id>
          <pub-id pub-id-type="medline">20155319</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brydolf</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Segesten</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Living with ulcerative colitis: experiences of adolescents and young adults</article-title>
          <source>J Adv Nurs</source>
          <year>1996</year>
          <month>01</month>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>39</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1365-2648.1996.tb03133.x</pub-id>
          <pub-id pub-id-type="medline">8708222</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Beusterien</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cheifetz</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Moss</surname>
              <given-names>AC</given-names>
            </name>
          </person-group>
          <article-title>The burden of inflammatory bowel disease: a patient-reported qualitative analysis and development of a conceptual model</article-title>
          <source>Inflamm Bowel Dis</source>
          <year>2014</year>
          <month>03</month>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>545</fpage>
          <lpage>52</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24407484"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/01.MIB.0000440983.86659.81</pub-id>
          <pub-id pub-id-type="medline">24407484</pub-id>
          <pub-id pub-id-type="pmcid">PMC3932523</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Dougall</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hungin</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Neely</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The fight for 'health-related normality': a qualitative study of the experiences of individuals living with established inflammatory bowel disease (ibd)</article-title>
          <source>J Health Psychol</source>
          <year>2005</year>
          <month>05</month>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>443</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1177/1359105305051433</pub-id>
          <pub-id pub-id-type="medline">15857873</pub-id>
          <pub-id pub-id-type="pii">10/3/443</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frohlich</surname>
              <given-names>DO</given-names>
            </name>
          </person-group>
          <article-title>The social construction of inflammatory bowel disease using social media technologies</article-title>
          <source>Health Commun</source>
          <year>2016</year>
          <month>11</month>
          <volume>31</volume>
          <issue>11</issue>
          <fpage>1412</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1080/10410236.2015.1077690</pub-id>
          <pub-id pub-id-type="medline">27050670</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kemp</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Griffiths</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lovell</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Understanding the health and social care needs of people living with IBD: a meta-synthesis of the evidence</article-title>
          <source>World J Gastroenterol</source>
          <year>2012</year>
          <month>11</month>
          <day>21</day>
          <volume>18</volume>
          <issue>43</issue>
          <fpage>6240</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.wjgnet.com/1007-9327/full/v18/i43/6240.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.3748/wjg.v18.i43.6240</pub-id>
          <pub-id pub-id-type="medline">23180944</pub-id>
          <pub-id pub-id-type="pmcid">PMC3501772</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Becker</surname>
              <given-names>KL</given-names>
            </name>
          </person-group>
          <article-title>Cyberhugs: creating a voice for chronic pain sufferers through technology</article-title>
          <source>Cyberpsychol Behav Soc Netw</source>
          <year>2013</year>
          <month>02</month>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>123</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1089/cyber.2012.0361</pub-id>
          <pub-id pub-id-type="medline">23276258</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wiese</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kelley</surname>
              <given-names>PG</given-names>
            </name>
            <name name-style="western">
              <surname>Cranor</surname>
              <given-names>LF</given-names>
            </name>
            <name name-style="western">
              <surname>Dabbish</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Are you close with me? Are you nearby?: investigating social groups, closeness, and willingness to share</article-title>
          <source>Proceedings of the 13th International Conference on Ubiquitous Computing</source>
          <year>2011</year>
          <conf-name>UbiComp '11</conf-name>
          <conf-date>September 17-21, 2011</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <fpage>197</fpage>
          <lpage>206</lpage>
          <pub-id pub-id-type="doi">10.1145/2030112.2030140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paek</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hove</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ju Jeong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Peer or expert? The persuasive impact of YouTube public service announcement producers</article-title>
          <source>Int J Advert</source>
          <year>2015</year>
          <month>01</month>
          <day>07</day>
          <volume>30</volume>
          <issue>1</issue>
          <fpage>161</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.2501/ija-30-1-161-188</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>WY</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Omori</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Health information seeking in the Web 2.0 age: trust in social media, uncertainty reduction, and self-disclosure</article-title>
          <source>Comput Human Behav</source>
          <year>2016</year>
          <month>03</month>
          <volume>56</volume>
          <fpage>289</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2015.11.055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kimberly</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Data mining Twitter for cancer, diabetes, and asthma insights</article-title>
          <source>Purdue University</source>
          <year>2016</year>
          <access-date>2018-01-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://docs.lib.purdue.edu/dissertations/AAI10170604/">https://docs.lib.purdue.edu/dissertations/AAI10170604/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heaivilin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gerbert</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbs</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Public health surveillance of dental pain via Twitter</article-title>
          <source>J Dent Res</source>
          <year>2011</year>
          <month>09</month>
          <volume>90</volume>
          <issue>9</issue>
          <fpage>1047</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21768306"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0022034511415273</pub-id>
          <pub-id pub-id-type="medline">21768306</pub-id>
          <pub-id pub-id-type="pii">0022034511415273</pub-id>
          <pub-id pub-id-type="pmcid">PMC3169887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Mueller</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Snider</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Haire-Joshu</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Local health department use of Twitter to disseminate diabetes information, United States</article-title>
          <source>Prev Chronic Dis</source>
          <year>2013</year>
          <month>05</month>
          <day>02</day>
          <volume>10</volume>
          <fpage>E70</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/pcd/issues/2013/12_0215.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.5888/pcd10.120215</pub-id>
          <pub-id pub-id-type="medline">23639765</pub-id>
          <pub-id pub-id-type="pii">E70</pub-id>
          <pub-id pub-id-type="pmcid">PMC3652718</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hemsley</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Palmer</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Two studies on Twitter networks and tweet content in relation to Amyotrophic Lateral Sclerosis (ALS): conversation, information, and ‘Diary of a Daily Life’</article-title>
          <source>Digital Health Innovation for Consumers, Clinicians, Connectivity and Community: Selected Papers from the 24th Australian National Health Informatics Conference</source>
          <year>2016</year>
          <conf-name>HIC '16</conf-name>
          <conf-date>July 25-27, 2016</conf-date>
          <conf-loc>Melbourne, Australia</conf-loc>
          <publisher-loc>Amsterdam, The Netherlands</publisher-loc>
          <publisher-name>IOS Press</publisher-name>
          <fpage>41</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.3233/978-1-61499-666-8-41</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adrover</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bodnar</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Telenti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Salathé</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Identifying adverse effects of HIV drug treatment and associated sentiments using Twitter</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2015</year>
          <month>7</month>
          <day>27</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>e7</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2015/2/e7/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.4488</pub-id>
          <pub-id pub-id-type="medline">27227141</pub-id>
          <pub-id pub-id-type="pii">v1i2e7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4869211</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sioula-Georgoulea</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Approaching Twitter sociologically: a case study of the public humiliation of HIV-positive women</article-title>
          <source>Επιθεώρηση Κοινωνικών Ερευνών</source>
          <year>2015</year>
          <month>11</month>
          <day>25</day>
          <volume>144</volume>
          <fpage>103</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.12681/grsr.8625</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Odlum</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>HIV/AIDS and the millennium development goals: a public sentiment analysis of world AIDS day Twitter chat</article-title>
          <source>Int J AIDS Res</source>
          <year>2016</year>
          <month>11</month>
          <day>14</day>
          <volume>3</volume>
          <issue>9</issue>
          <fpage>134</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scidoc.org/articlepdfs/IJHR/IJHR-2379-1586-03-901.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.19070/2379-1586-1600026</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>RW</given-names>
            </name>
          </person-group>
          <article-title>Seeking and sharing health information online: comparing search engines and social media</article-title>
          <source>Proceedings of the SIGCHI Conference on Human Factors in Computing Systems</source>
          <year>2014</year>
          <conf-name>CHI '14</conf-name>
          <conf-date>April 26-May 1, 2014</conf-date>
          <conf-loc>Toronto, Canada</conf-loc>
          <fpage>1365</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1145/2556288.2557214</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arakawa</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kameda</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aizawa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Suzuki</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Adding Twitter-specific features to stylistic features for classifying tweets by user type and number of retweets</article-title>
          <source>J Assn Inf Sci Tec</source>
          <year>2014</year>
          <month>01</month>
          <day>22</day>
          <volume>65</volume>
          <issue>7</issue>
          <fpage>1416</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1002/asi.23126</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holmberg</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bowman</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Haustein</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Astrophysicists' conversational connections on Twitter</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <month>8</month>
          <day>25</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>e106086</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0106086"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0106086</pub-id>
          <pub-id pub-id-type="medline">25153196</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-22823</pub-id>
          <pub-id pub-id-type="pmcid">PMC4143334</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennacchiotti</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Popescu</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Democrats, republicans and starbucks afficionados: user classification in Twitter</article-title>
          <source>Proceedings of the 17th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2011</year>
          <conf-name>KDD '11</conf-name>
          <conf-date>August 21-24, 2011</conf-date>
          <conf-loc>San Diego, CA, USA</conf-loc>
          <fpage>430</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1145/2020408.2020477</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennacchiotti</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Popescu</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>A machine learning approach to Twitter user classification</article-title>
          <source>Proc Int AAI Conf Web Soc Media</source>
          <year>2011</year>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>281</fpage>
          <lpage>8</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yarowsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shreevats</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Classifying latent user attributes in Twitter</article-title>
          <source>Proceedings of the 2nd International Workshop on Search and Mining User-Generated Contents</source>
          <year>2010</year>
          <conf-name>SMUC '10</conf-name>
          <conf-date>October 30, 2010</conf-date>
          <conf-loc>Toronto, Canada</conf-loc>
          <fpage>37</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1145/1871985.1871993</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Diakopoulos</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Naaman</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Unfolding the event landscape on Twitter: classification and exploration of user categories</article-title>
          <source>Proceedings of the ACM 2012 conference on Computer Supported Cooperative Work</source>
          <year>2012</year>
          <conf-name>CSCW '12</conf-name>
          <conf-date>February 11-15, 2012</conf-date>
          <conf-loc>Seattle, WA, USA</conf-loc>
          <fpage>241</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1145/2145204.2145242</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holmberg</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Eriksson-Backa</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ek</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Tweeting about diabetes and diets – content and conversational connections</article-title>
          <source>Proceedings of the 5th International Conference on Well-Being in the Information Society</source>
          <year>2014</year>
          <conf-name>WIS '14</conf-name>
          <conf-date>August 18-20, 2014</conf-date>
          <conf-loc>Turku, Finland</conf-loc>
          <fpage>45</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-10211-5_5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bergsma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van Durme</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yarowsky</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Broadly improving user classification via communication-based name and location clustering on Twitter</article-title>
          <source>Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2013</year>
          <conf-name>HLT-NAACL '13</conf-name>
          <conf-date>June 9-14, 2013</conf-date>
          <conf-loc>Atlanta, GA, USA</conf-loc>
          <fpage>1010</fpage>
          <lpage>9</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Mart</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Moreland-Russell</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Caburnay</surname>
              <given-names>CA</given-names>
            </name>
          </person-group>
          <article-title>Diabetes topics associated with engagement on Twitter</article-title>
          <source>Prev Chronic Dis</source>
          <year>2015</year>
          <month>05</month>
          <day>07</day>
          <volume>12</volume>
          <fpage>E62</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/pcd/issues/2015/14_0402.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.5888/pcd12.140402</pub-id>
          <pub-id pub-id-type="medline">25950569</pub-id>
          <pub-id pub-id-type="pii">E62</pub-id>
          <pub-id pub-id-type="pmcid">PMC4436046</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Silverman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rowe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rowe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tick</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Testa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dodds</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Alabbas</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Borum</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Who is saying what about Inflammatory Bowel Disease on Twitter?</article-title>
          <source>GW Annual Research Days 2018</source>
          <year>2018</year>
          <conf-name>GW Research '18</conf-name>
          <conf-date>April 10-11, 2018</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>176</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hsrc.himmelfarb.gwu.edu/gw_research_days/2018/SMHS/62/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rowe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rowe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Silverman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Borum</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>P024 Crohn’s disease messaging on Twitter: who’s talking?</article-title>
          <source>Gastroenterology</source>
          <year>2018</year>
          <month>01</month>
          <day>1</day>
          <volume>154</volume>
          <issue>1</issue>
          <fpage>S13</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1053/j.gastro.2017.11.059</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roccetti</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Casari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marfia</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Inside chronic autoimmune disease communities: a social networks perspective to Crohn's patient behavior and medical information</article-title>
          <source>Proceedings of the 2015 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining</source>
          <year>2015</year>
          <conf-name>ASONAM '15</conf-name>
          <conf-date>August 25-28, 2015</conf-date>
          <conf-loc>Paris, France</conf-loc>
          <fpage>1089</fpage>
          <lpage>96</lpage>
          <pub-id pub-id-type="doi">10.1145/2808797.2808813</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Neill</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shandro</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Poullis</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Patient perspectives on social-media-delivered telemedicine for inflammatory bowel disease</article-title>
          <source>Future Healthc J</source>
          <year>2020</year>
          <month>10</month>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>241</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33094237"/>
          </comment>
          <pub-id pub-id-type="doi">10.7861/fhj.2020-0094</pub-id>
          <pub-id pub-id-type="medline">33094237</pub-id>
          <pub-id pub-id-type="pii">futurehealth</pub-id>
          <pub-id pub-id-type="pmcid">PMC7571747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frohlich</surname>
              <given-names>DO</given-names>
            </name>
            <name name-style="western">
              <surname>Zmyslinski-Seelig</surname>
              <given-names>AN</given-names>
            </name>
          </person-group>
          <article-title>How Uncover Ostomy challenges ostomy stigma, and encourages others to do the same</article-title>
          <source>New Media Soc</source>
          <year>2014</year>
          <month>07</month>
          <day>09</day>
          <volume>18</volume>
          <issue>2</issue>
          <fpage>220</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1177/1461444814541943</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pérez-Pérez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pérez-Rodríguez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Fdez-Riverola</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lourenço</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter to understand the human bowel disease community: exploratory analysis of key topics</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>08</month>
          <day>15</day>
          <volume>21</volume>
          <issue>8</issue>
          <fpage>e12610</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/8/e12610/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12610</pub-id>
          <pub-id pub-id-type="medline">31411142</pub-id>
          <pub-id pub-id-type="pii">v21i8e12610</pub-id>
          <pub-id pub-id-type="pmcid">PMC6711036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Margolis</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Maddali</surname>
              <given-names>HT</given-names>
            </name>
            <name name-style="western">
              <surname>Gloor</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Comparing online community structure of patients of chronic diseases</article-title>
          <source>Int J Organ Des Eng</source>
          <year>2016</year>
          <volume>4</volume>
          <issue>1/2</issue>
          <fpage>113</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1504/ijode.2016.10001025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Spiegel</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>van Oijen</surname>
              <given-names>MG</given-names>
            </name>
          </person-group>
          <article-title>Tu1068 Twitter offers insight into Health Related Quality of Life (HRQoL) in ulcerative colitis</article-title>
          <source>Gastroenterology</source>
          <year>2013</year>
          <month>05</month>
          <volume>5</volume>
          <issue>144</issue>
          <fpage>S-751</fpage>
          <pub-id pub-id-type="doi">10.1016/s0016-5085(13)62785-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dietterich</surname>
              <given-names>TG</given-names>
            </name>
            <name name-style="western">
              <surname>Lathrop</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Lozano-Pérez</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Solving the multiple instance problem with axis-parallel rectangles</article-title>
          <source>Artif Intell</source>
          <year>1997</year>
          <month>01</month>
          <volume>89</volume>
          <issue>1-2</issue>
          <fpage>31</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1016/s0004-3702(96)00034-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Statistical learning in multiple instance problems</article-title>
          <source>The University of Waikato</source>
          <year>2003</year>
          <month>6</month>
          <access-date>2018-07-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cs.waikato.ac.nz/~ml/publications/2003/xinxu_thesis.pdf">https://www.cs.waikato.ac.nz/~ml/publications/2003/xinxu_thesis.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>MI</given-names>
            </name>
          </person-group>
          <article-title>Latent dirichlet allocation</article-title>
          <source>J Mach Learn Res</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>993</fpage>
          <lpage>1022</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Foulds</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Frank</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A review of multi-instance learning assumptions</article-title>
          <source>Knowl Eng Rev</source>
          <year>2010</year>
          <month>03</month>
          <day>01</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1017/s026988890999035x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A comparison of multi-instance learning algorithms</article-title>
          <source>The University of Waikato</source>
          <year>2006</year>
          <month>2</month>
          <access-date>2018-07-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://researchcommons.waikato.ac.nz/bitstream/handle/10289/2453/thesis.pdf?sequence=1&#38;isAllowed=y">https://researchcommons.waikato.ac.nz/bitstream/handle/10289/2453/thesis.pdf?sequence=1&#38;isAllowed=y</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schulte</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Routley</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Aggregating predictions vs. aggregating features for relational classification</article-title>
          <source>Proceedings of the 2014 IEEE Symposium on Computational Intelligence and Data Mining</source>
          <year>2014</year>
          <conf-name>CIDM '14</conf-name>
          <conf-date>December 9-12, 2014</conf-date>
          <conf-loc>Orlando, FL, USA</conf-loc>
          <fpage>121</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1109/cidm.2014.7008657</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>É</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>J Mach Learn Res</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>30</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="web">
          <article-title>Composition of foods integrated dataset (CoFID)</article-title>
          <source>Public Health England</source>
          <year>2015</year>
          <month>3</month>
          <day>25</day>
          <access-date>2019-09-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gov.uk/government/publications/composition-of-foods-integrated-dataset-cofid#:~:text=The%20'Composition%20of%20Foods%20Integrated,recipes%20within%20the%20pork%20section">https://www.gov.uk/government/publications/composition-of-foods-integrated-dataset-cofid#:~:text=The%20'Composition%20of %20Foods%20Integrated,recipes%20within%20the%20pork%20section</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <article-title>FoodData Central</article-title>
          <source>U.S. Department of Agriculture, Agricultural Research Service</source>
          <year>2019</year>
          <access-date>2019-09-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fdc.nal.usda.gov/">https://fdc.nal.usda.gov/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="web">
          <article-title>Natural Language Understanding</article-title>
          <source>IBM Cloud API Docs</source>
          <year>2020</year>
          <access-date>2021-07-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cloud.ibm.com/apidocs/natural-language-understanding">https://cloud.ibm.com/apidocs/natural-language-understanding</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bath</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Demartini</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Woodfield</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter as a data source: an overview of ethical, legal, and methodological challenges</article-title>
          <source>The Ethics of Online Research (Advances in Research Ethics and Integrity, Volume 2)</source>
          <year>2017</year>
          <publisher-loc>Bingley, UK</publisher-loc>
          <publisher-name>Emerald Group Publishing</publisher-name>
          <fpage>79</fpage>
          <lpage>107</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fiesler</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Proferes</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>“Participant” perceptions of Twitter research ethics</article-title>
          <source>Soc Media Soc</source>
          <year>2018</year>
          <month>03</month>
          <day>10</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>205630511876336</fpage>
          <pub-id pub-id-type="doi">10.1177/2056305118763366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Burnap</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sloan</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Towards an ethical framework for publishing Twitter data in social research: taking into account users' views, online context and algorithmic estimation</article-title>
          <source>Sociology</source>
          <year>2017</year>
          <month>12</month>
          <volume>51</volume>
          <issue>6</issue>
          <fpage>1149</fpage>
          <lpage>68</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/0038038517708140?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0038038517708140</pub-id>
          <pub-id pub-id-type="medline">29276313</pub-id>
          <pub-id pub-id-type="pii">10.1177_0038038517708140</pub-id>
          <pub-id pub-id-type="pmcid">PMC5718335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ducharme</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vincent</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jauvin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A neural probabilistic language model</article-title>
          <source>J Mach Learn Res</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>1137</fpage>
          <lpage>55</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Collobert</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A unified architecture for natural language processing: deep neural networks with multitask learning</article-title>
          <source>Proceedings of the 25th International Conference on Machine Learning</source>
          <year>2008</year>
          <conf-name>ICML '08</conf-name>
          <conf-date>July 5-9, 2008</conf-date>
          <conf-loc>Helsinki, Finland</conf-loc>
          <fpage>160</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1145/1390156.1390177</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
