<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v21i6e12876</article-id>
      <article-id pub-id-type="pmid">31199327</article-id>
      <article-id pub-id-type="doi">10.2196/12876</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Mining of Textual Health Information from Reddit: Analysis of Chronic Diseases With Extracted Entities and Their Relations</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sarker</surname>
            <given-names>Abeed</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kulanthaivel</surname>
            <given-names>Anand</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Waring</surname>
            <given-names>Molly</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Van Den Broek-Altenburg</surname>
            <given-names>Eline</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="author" id="contrib1" equal-contrib="yes">
          <name name-style="western">
            <surname>Foufi</surname>
            <given-names>Vasiliki</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8625-0734</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib2" equal-contrib="yes">
          <name name-style="western">
            <surname>Timakum</surname>
            <given-names>Tatsawan</given-names>
          </name>
          <degrees>BA, MA</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9877-0323</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib3" equal-contrib="yes">
          <name name-style="western">
            <surname>Gaudet-Blavignac</surname>
            <given-names>Christophe</given-names>
          </name>
          <degrees>BSc CS, MMed</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6527-5898</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib4">
          <name name-style="western">
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
          <degrees>MD, MPH, FACMI</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-2681-8076</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib5" corresp="yes">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Min</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Department of Library and Information Science</institution>
            <institution>Yonsei University</institution>
            <addr-line>50 Yonsei-ro, Seodaemun-gu</addr-line>
            <addr-line>Seoul, 120-749</addr-line>
            <country>Republic of Korea</country>
            <phone>82 22123 2405</phone>
            <fax>82 2393 8348</fax>
            <email>min.song@yonsei.ac.kr</email>
          </address>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-3255-1600</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
      <label>1</label>
      <institution>Division of Medical Information Sciences</institution>
      <institution>University Hospitals of Geneva</institution>  
      <addr-line>Geneva</addr-line>
      <country>Switzerland</country></aff>
      <aff id="aff2">
      <label>2</label>
      <institution>Faculty of Medicine</institution>
      <institution>University of Geneva</institution>  
      <addr-line>Geneva</addr-line>
      <country>Switzerland</country></aff>
      <aff id="aff3">
      <label>3</label>
      <institution>Department of Library and Information Science</institution>
      <institution>Yonsei University</institution>  
      <addr-line>Seoul</addr-line>
      <country>Republic of Korea</country></aff>
      <author-notes>
        <corresp>Corresponding Author: Min Song 
        <email>min.song@yonsei.ac.kr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2019</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>13</day>
        <month>6</month>
        <year>2019</year>
      </pub-date>
      <volume>21</volume>
      <issue>6</issue>
      <elocation-id>e12876</elocation-id>
      <!--history from ojs - api-xml-->
      <history>
        <date date-type="received">
          <day>22</day>
          <month>11</month>
          <year>2018</year>
        </date>
        <date date-type="rev-request">
          <day>21</day>
          <month>12</month>
          <year>2018</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>5</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>21</day>
          <month>5</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Vasiliki Foufi, Tatsawan Timakum, Christophe Gaudet-Blavignac, Christian Lovis, Min Song. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 13.06.2019.</copyright-statement>
      <copyright-year>2019</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2019/6/e12876/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Social media platforms constitute a rich data source for natural language processing tasks such as named entity recognition, relation extraction, and sentiment analysis. In particular, social media platforms about health provide a different insight into patient’s experiences with diseases and treatment than those found in the scientific literature.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This paper aimed to report a study of entities related to chronic diseases and their relation in user-generated text posts. The major focus of our research is the study of biomedical entities found in health social media platforms and their relations and the way people suffering from chronic diseases express themselves.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We collected a corpus of 17,624 text posts from disease-specific subreddits of the social news and discussion website Reddit. For entity and relation extraction from this corpus, we employed the PKDE4J tool developed by Song et al (2015). PKDE4J is a text mining system that integrates dictionary-based entity extraction and rule-based relation extraction in a highly flexible and extensible framework.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Using PKDE4J, we extracted 2 types of entities and relations: biomedical entities and relations and subject-predicate-object entity relations. In total, 82,138 entities and 30,341 relation pairs were extracted from the Reddit dataset. The most highly mentioned entities were those related to oncological disease (2884 occurrences of cancer) and asthma (2180 occurrences). The relation pair anatomy-disease was the most frequent (5550 occurrences), the highest frequent entities in this pair being cancer and lymph. The manual validation of the extracted entities showed a very good performance of the system at the entity extraction task (3682/5151, 71.48% extracted entities were correctly labeled).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study showed that people are eager to share their personal experience with chronic diseases on social media platforms despite possible privacy and security issues. The results reported in this paper are promising and demonstrate the need for more in-depth studies on the way patients with chronic diseases express themselves on social media platforms.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>social media</kwd>
        <kwd>chronic disease</kwd>
        <kwd>data mining</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>People are often concerned about their health status and a range of medical issues, especially when it comes to complex or chronic diseases that can take a long time to treat or monitor. Patients often desire easy access to information about diseases and symptoms to understand their condition and to facilitate self-management of diseases without total reliance upon interaction with a physician [<xref ref-type="bibr" rid="ref1">1</xref>]. Patients with chronic diseases in particular use social media to seek and provide social, emotional, and practical support [<xref ref-type="bibr" rid="ref2">2</xref>]. Therefore, social media information can influence patients’ decisions to manage their chronic condition [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>Social media platforms may support patients in their search for medical products or provide suggestions to promote healthy behavior and can improve health education as they allow people to write about their experiences with diseases, drugs, symptoms, and treatments. In recent years, social media platforms have grown quickly, with the public, patients, and health professionals sharing their experiences, looking for information, and interacting with others.</p>
        <p>Currently, more than 74% of internet users connect to social media, and 42% of the internet users take advantage of social media for health information. Moreover, 32% of social media users in the United States share about their health care experiences and family’s struggle stories and 29% search for health information via social media platforms to observe other patients’ experiences with their diseases [<xref ref-type="bibr" rid="ref3">3</xref>]. Furthermore, 51% of those who live with a chronic disease have used the internet for information about health topics such as details of a specific disease, medical procedures, drugs, medical devices, or health insurances [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>With its growing number of users, social media has become a powerful tool that can promote information sharing about health care, provide feedback from users, and foster support systems [<xref ref-type="bibr" rid="ref5">5</xref>]. In addition, the existence of social media platforms enables researchers to learn and discover the health experiences and feeling of patients and potentially discover new knowledge in health science. For example, user conversation content from health-related online forums, such as blogs, Twitter, and Facebook, has already been analyzed to find the clusters of breast cancer symptom [<xref ref-type="bibr" rid="ref6">6</xref>], examine smoking [<xref ref-type="bibr" rid="ref7">7</xref>], and understand the user discourse and describe social media interactions about obesity prevention [<xref ref-type="bibr" rid="ref8">8</xref>]. In particular, Reddit has been used as a data source for similar studies [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref12">12</xref>].</p>
        <p>The interactions between individuals on social media and the information they share constitute an important new source of data that can be used, on one hand, to understand the impact of drugs, diseases, and medical treatments on patients outside controlled clinical settings and, on the other hand, to comprehend health-related behavior.</p>
        <p>Discovering public knowledge in social media text constitutes a challenge for researchers and health care providers. To achieve this goal, various text mining approaches, such as topic modeling, information extraction, and visualization, exist.</p>
      </sec>
      <sec>
        <title>Biomedical Entity and Relation Extraction</title>
        <p>In the era of biomedical text mining, bioentities and their relations have arisen as a challenge to discover new knowledge. To mine the huge amounts of unstructured data, automatic information extraction tools have been conceived and developed based on several approaches. There are multiple systems developed for the identification and analysis of relations between diseases, drugs, and genes, such as Extraction of Drugs, Genes and Relations, a natural language system that extracts information about drugs and genes relevant to cancer from the biomedical literature [<xref ref-type="bibr" rid="ref13">13</xref>]. Extraction of drug-disease treatment pairs from the published literature was also carried out [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. To extract health social media information, adverse drug reactions and drug indications from a Spanish health forum were examined [<xref ref-type="bibr" rid="ref16">16</xref>] using MeaningCloud [<xref ref-type="bibr" rid="ref17">17</xref>], a multilingual text analysis engine based on a distant-supervision method to detect relations between drugs and side effects and used them to classify the relation instances.</p>
        <p>PKDE4J2.0 is a system that extracts bioentities and their relations with the aim to discover biomedical scientific knowledge. It is based on a dictionary to automatically tag bioentities according to their types and a set of predefined rules used for relation extraction. PKDE4J2.0 can be applied for knowledge search, knowledge network construction, and knowledge inference [<xref ref-type="bibr" rid="ref18">18</xref>]. PKDE4J1.1 was used to investigate drug-disease interactions in article abstracts from PubMed Central for making drug-symptom-disease triples [<xref ref-type="bibr" rid="ref19">19</xref>]. This tool was also applied in biomedical literature to extract biomedical verbs to present a relation type between 2 entities [<xref ref-type="bibr" rid="ref20">20</xref>] and on full-text papers to extract biological entities from diseases and genes and construct a knowledge network [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
      </sec>
      <sec>
        <title>Health Information Extraction From Social Media Platforms</title>
        <p>A large number of patients, caregivers, and health professionals use social media platforms to discuss mental health issues. They also constitute an important data source for researchers. Machine learning and statistical methods were used to discriminate online messages between depression and control communities using mood, psycholinguistic processes, and content topics extracted from the posts generated by members of these communities [<xref ref-type="bibr" rid="ref22">22</xref>]. Users are interested in searching for treatment-related information, communicating with physicians to share their feelings about treatment effectiveness and side effects, discussing questions in health communities, and gaining knowledge about their conditions [<xref ref-type="bibr" rid="ref23">23</xref>]. User-generated content from these platforms contains valuable information [<xref ref-type="bibr" rid="ref24">24</xref>]. Their posts reflect what users think and feel about their medical experiences and often attract the attention of other patients, caregivers, and doctors.</p>
        <p>Lu et al [<xref ref-type="bibr" rid="ref25">25</xref>] mined data from online health communities and used text clustering integrating medical domain–specific knowledge to investigate patient needs and interests. Their results show that compared with existing methods, the addition of medical domain–specific features into their feature sets achieved significantly better clustering than was achieved without the addition of those features. Moreover, there were significant differences in hot topics on different kinds of disease discussion platforms. Health-related posts on social media were analyzed to investigate the polarity of opinions online, performing sentiment analysis [<xref ref-type="bibr" rid="ref26">26</xref>]. Medical terms, including those related to conditions, symptoms, treatments, effectiveness, and side effects, were extracted to generate a virtual document addressing each question raised by members of the community. Then latent Dirichlet allocation (LDA) was modified by adding a weighting scheme known as conditional LDA to cluster virtual documents with similar distributions of medical terms into a conditional topic (C-topic). Finally, the clustered C-topics were analyzed according to sentiment polarities and physiological and psychological sentiments. Identification of topics of patients' discussions on (1) Facebook about breast cancer and (2) cancerdusein.org was performed [<xref ref-type="bibr" rid="ref27">27</xref>]. These topics were assigned to functional and symptomatic dimensions by applying LDA topic modeling and identified relations between the topics and the questionnaires.</p>
        <p>Among others, Denecke [<xref ref-type="bibr" rid="ref1">1</xref>] reported that “user-generated content on the web has become a new source of useful information to be added to the conventional methods of collecting clinical data.”</p>
        <p>In terms of biomedical information extraction, previous studies relied on formal research and individual case studies to identify biomedical information. These approaches include observations of changes in patients [<xref ref-type="bibr" rid="ref28">28</xref>], meta-analysis of data from relevant databases [<xref ref-type="bibr" rid="ref29">29</xref>], and surveys of cancer patients [<xref ref-type="bibr" rid="ref30">30</xref>]. However, the scientific literature is generally limited to subscribers, and electronic medical records are not publicly available for reasons of patient privacy [<xref ref-type="bibr" rid="ref31">31</xref>]. Moreover, these sources do not provide a complete understanding of how patients suffering from a chronic disease feel and how they express these feelings.</p>
        <p>Using data from conversations between patients on social media platforms provides valuable information for researchers, physicians, and health care providers. This data source is different from, and complementary to, that obtained from conventional experimental methods.</p>
      </sec>
      <sec>
        <title>Research Objectives</title>
        <p>Therefore, a social media platform (Reddit) was chosen as the data source for this research that aimed to answer the following questions:</p>
        <list list-type="order">
          <list-item>
            <p>Which biomedical entities are prominent in the health social media platforms?</p>
          </list-item>
          <list-item>
            <p>What types of entities are related in the corpus?</p>
          </list-item>
          <list-item>
            <p>How do people express themselves about chronic diseases on social media platforms?</p>
          </list-item>
        </list>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection</title>
        <p>The data used for this research were extracted from disease-specific subreddits of the social news and discussion website Reddit [<xref ref-type="bibr" rid="ref32">32</xref>]. Forums such as Reddit tend to have sharp contrast when compared with similar offline groups; for instance, people are likely to discuss problems that they do not feel comfortable to discuss face to face [<xref ref-type="bibr" rid="ref33">33</xref>]. As of 2013, Reddit’s official statistics included 56 billion page views, 731 million unique visitors, 40,855,032 posts, and 404,603,286 comments [<xref ref-type="bibr" rid="ref34">34</xref>]. In particular, the subreddit about cancer numbers 22,429 subscribers and 75 posts per day [<xref ref-type="bibr" rid="ref35">35</xref>]. These numbers demonstrate the external validity of Reddit. Another reason for having chosen Reddit as a data source is that the language of text posts is more structured than in other social media platforms such as Twitter.</p>
        <p>Reddit’s core functionality is the sharing of text-based posts with others who may or may not be members of the site. The subforum function allows the creation of designated spaces for users to congregate and interact with each other over a shared interest. Those subforums are called <italic>subreddits</italic>. A finite set of 19 subreddits related to chronic diseases was empirically selected for analysis. The choice of the specific subreddits was based on medical expertise and on the impact of these diseases on the quality of everyday life of patients.</p>
        <p>As the main goal was the detection of relations between entities and of the way people suffering from chronic diseases express themselves in social media and not the study of characteristics of specific chronic diseases, the posts from the 19 subreddits were merged in a single dataset.</p>
        <p>All of these subreddits host public content. In this research, no populational study has been performed. The study focuses on the expression of feelings and not on the identity of people sharing their experiences. From each post, only the title of the post and the body or textual content was extracted without additional information related to their authors.</p>
        <p>The study was submitted to the Swiss Ethical Committee who concluded to a decision of nonconsideration provided that the collected data are not identifiable.</p>
      </sec>
      <sec>
        <title>Lexicosemantic Resources</title>
        <p>Lexicosemantic resources were constructed and incorporated into the tool. These resources included a list of stop words and biomedical dictionaries of diseases, drugs, anatomy, procedures, symptoms, side effects, and findings created from clinical health care terminologies such as the Systematized Nomenclature of Human and Veterinary Medicine - Clinical Terms [<xref ref-type="bibr" rid="ref36">36</xref>], the National Library of Medicine's controlled vocabulary thesaurus [<xref ref-type="bibr" rid="ref37">37</xref>], the Gene Ontology knowledgebase [<xref ref-type="bibr" rid="ref38">38</xref>], the Kyoto Encyclopedia of Genes and Genomes database [<xref ref-type="bibr" rid="ref39">39</xref>], and the DrugBank database [<xref ref-type="bibr" rid="ref40">40</xref>]. Semantic relations properties were attributed to 4558 biomedical verbs extracted from the Unified Medical Language System [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
        <p>The dictionaries were enriched with lemmas extracted from the corpus; for instance, <italic>chemo</italic>, <italic>AML</italic> (acute myeloid leukemia), <italic>take care, support</italic>, and <italic>fight</italic>.</p>
      </sec>
      <sec>
        <title>Description of the Tool</title>
        <p>In this research, the PKDE4J version 2.0 tool [<xref ref-type="bibr" rid="ref42">42</xref>] was used. This text mining system consists of 2 modules: entity extraction and relation extraction.</p>
        <sec>
          <title>Entity Extraction Module</title>
          <p>This module integrates dictionary-based entity extraction and rule-based relation extraction into a highly flexible and extensible framework. The Stanford CoreNLP pipeline [<xref ref-type="bibr" rid="ref43">43</xref>] was modified to make it suitable for advanced dictionary-based entity extraction. The entity extraction module consists of 4 major submodules: preprocessing, dictionary loading, entity annotation, and postprocessing. PKDE4J can analyze entities and relations from both structured and unstructured text.</p>
        </sec>
        <sec>
          <title>Relation Extraction Module</title>
          <p>The relation extraction workflow identifies directed qualified relations starting from sentences from which 2 or more entities have been extracted by the entity extraction module. The relation extraction module takes a list of verbs and nominalization terms that are employed to identify relations of interest. After extracting entities from a sentence, further relation extraction algorithms are executed to construct rules for the extraction of relations of entities. A set of 20 dependency parsing–based rules is at the core of the relation extraction module and provides an ontologically enriched structure for sentences by annotating edges with dependency types. To extract relations, the system identifies a verb, which may be located between entities and contains relational characteristics, then, it checks the bioverb list to determine the relation between the entities (<xref ref-type="fig" rid="figure1">Figure 1</xref>) [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>The workflow of the PKDE4J text mining system.</p>
            </caption>
            <graphic xlink:href="jmir_v21i6e12876_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Visualization</title>
        <p>The Gephi platform [<xref ref-type="bibr" rid="ref45">45</xref>] was used to visualize the network of chronic diseases in the corpus. To build a graph, the <italic>k-</italic> shortest paths routing algorithm was applied. The graph visualization tool was then used to map the chronic disease entities. A PageRank of terms was computed to rank the important entities in the network; therefore, entities ranked highly by PageRank have the highest impact.</p>
      </sec>
      <sec>
        <title>Validation of Entity Extraction</title>
        <p>To evaluate the performance of the tool on entity extraction, 1000 posts randomly selected from the entire corpus were manually validated. The entities were evaluated as correct or incorrect based on the following specific guidelines.</p>
        <sec>
          <title>Findings and Symptoms</title>
          <p>This category refers to a phenomenon that is experienced by a person or described by a clinician and cannot be considered as a disease in the context, for example, “This news makes me feel anxiety.”</p>
        </sec>
        <sec>
          <title>Disease Names</title>
          <p>This category refers to an abnormal condition of a human, animal, or plant that causes discomfort or dysfunction [<xref ref-type="bibr" rid="ref46">46</xref>]. As also mentioned in the previous category, the context helps to distinguish between a disease and a symptom or finding. For example, in the sentence “After trying which dosage is good, my insomnia is thankfully gone again,” <italic>insomnia</italic> refers to a disease, whereas in the sentence “I have had symptoms of insomnia within the last months,” <italic>insomnia</italic> describes a symptom/finding.</p>
        </sec>
        <sec>
          <title>Side Effects</title>
          <p>This category includes a symptom/finding or a disease that is caused by a treatment in the context. For example:</p>
          <disp-quote>
            <p>Since beginning treatment have woken with bouts of nausea...</p>
          </disp-quote>
        </sec>
        <sec>
          <title>Procedure</title>
          <p>Procedure refers to any intervention carried on someone related to physical mental or social health. For example:</p>
          <disp-quote>
            <p>...treatment which would include surgery and radiation/chemotherapy according to his oncologist</p>
          </disp-quote>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Collection</title>
        <p>A dataset of 17,624 text posts was semiautomatically collected using crawlers accessing public streams. <xref ref-type="table" rid="table1">Table 1</xref> shows the subreddits used for this research, the number of posts per subreddit, and the proportion of corpus representation of each subreddit:</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Sources used for the data collection.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="300"/>
            <col width="400"/>
            <thead>
              <tr valign="top">
                <td>Subreddit name</td>
                <td>Number of posts</td>
                <td>Proportion of posts from each subreddit in the corpus</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>r/cancer</td>
                <td>5210</td>
                <td>26.9</td>
              </tr>
              <tr valign="top">
                <td>r/MultipleSclerosis</td>
                <td>1902</td>
                <td>9.8</td>
              </tr>
              <tr valign="top">
                <td>r/rheumatoid</td>
                <td>1783</td>
                <td>9.2</td>
              </tr>
              <tr valign="top">
                <td>r/CrohnsDisease</td>
                <td>1722</td>
                <td>8.9</td>
              </tr>
              <tr valign="top">
                <td>r/Asthma</td>
                <td>1600</td>
                <td>8.3</td>
              </tr>
              <tr valign="top">
                <td>r/testicularcancer</td>
                <td>1384</td>
                <td>7.1</td>
              </tr>
              <tr valign="top">
                <td>r/Parkinsons</td>
                <td>1042</td>
                <td>5.4</td>
              </tr>
              <tr valign="top">
                <td>r/Hashimotos</td>
                <td>1022</td>
                <td>5.3</td>
              </tr>
              <tr valign="top">
                <td>r/Alzheimers</td>
                <td>927</td>
                <td>4.8</td>
              </tr>
              <tr valign="top">
                <td>r/breastcancer</td>
                <td>794</td>
                <td>4.1</td>
              </tr>
              <tr valign="top">
                <td>r/braincancer</td>
                <td>623</td>
                <td>3.2</td>
              </tr>
              <tr valign="top">
                <td>r/pancreaticcancer</td>
                <td>397</td>
                <td>2.1</td>
              </tr>
              <tr valign="top">
                <td>r/lymphoma</td>
                <td>387</td>
                <td>2.0</td>
              </tr>
              <tr valign="top">
                <td>r/leukemia</td>
                <td>223</td>
                <td>1.2</td>
              </tr>
              <tr valign="top">
                <td>r/kidney</td>
                <td>107</td>
                <td>0.6</td>
              </tr>
              <tr valign="top">
                <td>r/multiplemyeloma</td>
                <td>104</td>
                <td>0.5</td>
              </tr>
              <tr valign="top">
                <td>r/thyroidcancer</td>
                <td>63</td>
                <td>0.3</td>
              </tr>
              <tr valign="top">
                <td>r/lungcancer</td>
                <td>41</td>
                <td>0.2</td>
              </tr>
              <tr valign="top">
                <td>r/skincancer</td>
                <td>15</td>
                <td>0.1</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>19,346</td>
                <td>100</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>After sorting the data corpus, duplicate posts and those with no relevant meaning, such as advertising posts and posts containing only a hyperlink, were removed. The final corpus comprises 17,580 posts (2,137,115 tokens).</p>
      </sec>
      <sec>
        <title>Biomedical Entity and Relation Extraction</title>
        <p>The PKDE4J system performed named entity extraction and 2 types of relation extraction: relations between biomedical entities and between subjects, predicates, and objects on the sentence level. The system’s output is a corpus annotated with entities and information about their relation.</p>
        <p>The entities are either simple terms or complex structures referring to diseases, anatomy, procedures, findings, symptoms, side effects, or drugs. In total, PKDE4J extracted 82,138 entities from the Reddit dataset, as shown in <xref ref-type="table" rid="table2">Table 2</xref>. The entity names and entity types were allocated to the 7 categories of the biomedical dictionaries. The 10 most frequent entity names followed by the number of occurrences in the corpus are displayed in <xref ref-type="table" rid="table3">Table 3</xref>. It should be noted that the terms are given in the text in the form found in the corpus. Therefore, abbreviated terms have not been expanded.</p>
        <p>As displayed in the table, 29,669 disease entities were extracted representing 1341 unique diseases; 19,956 anatomy entities, of which 369 are distinct anatomical terms; 11,549 procedures of 296 different types; 6256 symptoms entities describing 65 symptoms; 5351 entities representing side effects of 321 different types; and 35 different drug names (616 in total). The most highly represented diseases are oncological (<italic>cancer</italic>, <italic>breast cancer</italic>, <italic>tumor</italic>, <italic>leukemia</italic>, and <italic>lymphoma</italic>) or relate to asthma. The anatomy category contains a range of anatomical terms. Specifically, <italic>blood</italic> is the most frequent term. Other widely used anatomical terms are <italic>back</italic>, <italic>brain</italic>, <italic>hand</italic>, <italic>hair</italic>, <italic>breast</italic>, <italic>chest</italic>, <italic>heart</italic>, and <italic>neck</italic>. The procedures category comprises terms referring to chemical treatments (<italic>chemo</italic>), surgery, laboratory test (<italic>blood test</italic>), social interventions (<italic>advice</italic> and <italic>listening</italic>), and others.</p>
        <p>The most frequent symptom mentioned in the corpus is <italic>pain</italic> (472 occurrences). <italic>Fatigue</italic>, <italic>inflammation</italic>, <italic>nausea</italic>, and <italic>cough</italic> are some of the symptoms commonly reported by patients or relatives in the dataset. In the side-effect category, the most frequent entities are <italic>anxiety</italic>, <italic>stress</italic>, <italic>swelling</italic>, <italic>crying</italic>, and <italic>fear</italic> followed by <italic>disability</italic> and <italic>worry</italic>. The most commonly reported drug is <italic>prednisone</italic> followed by <italic>morphine</italic>, <italic>salbutamol</italic>, and <italic>tramadol</italic>.</p>
      </sec>
      <sec>
        <title>Validation of Entity Extraction</title>
        <p>Among the 5151 extracted entities, 3682 were correctly labeled by the system, whereas 1469 were attributed with incorrect labels. The performance of the system was 71.48%.</p>
        <p>Next, an error analysis was performed on the incorrectly labeled entities. Errors were classified into 3 categories:</p>
        <list list-type="order">
          <list-item>
            <p>Lexical errors (488/1469, 33.21%): the term <italic>breast</italic> is an anatomical term, but in the post, the compound term <italic>breast cancer</italic> appears. However, the system failed to extract the entire entity.</p>
          </list-item>
          <list-item>
            <p>Dictionary errors (550/1469, 37.44%), for example, <italic>air</italic> and <italic>aspergillus</italic> were falsely listed as an anatomical term and as a drug name, respectively.</p>
          </list-item>
          <list-item>
            <p>Ambiguous concepts (431/1469, 29.33%): the term <italic>bleeding</italic> could be either a disease name or a symptom.</p>
          </list-item>
        </list>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Entity extraction results.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Entity types</td>
                <td>Diseases</td>
                <td>Anatomy</td>
                <td>Procedures</td>
                <td>Findings</td>
                <td>Symptoms</td>
                <td>Side effects</td>
                <td>Drugs</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Extracted entities, n</td>
                <td>29,669</td>
                <td>19,956</td>
                <td>11,549</td>
                <td>8741</td>
                <td>6256</td>
                <td>5351</td>
                <td>616</td>
              </tr>
              <tr valign="top">
                <td>Entity names, n</td>
                <td>1341</td>
                <td>369</td>
                <td>296</td>
                <td>483</td>
                <td>65</td>
                <td>321</td>
                <td>35</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Ten most frequent entities by type.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="120"/>
            <col width="160"/>
            <col width="140"/>
            <col width="160"/>
            <col width="140"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Diseases</td>
                <td>Anatomy</td>
                <td>Procedures</td>
                <td>Findings</td>
                <td>Symptoms</td>
                <td>Side effects</td>
                <td>Drugs</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Cancer (2884)</td>
                <td>Blood (1542)</td>
                <td>Chemo (1914)</td>
                <td>Related (521)</td>
                <td>Pain (2648)</td>
                <td>Anxiety (683)</td>
                <td>Prednisone (417)</td>
              </tr>
              <tr valign="top">
                <td>Asthma (2180)</td>
                <td>Back (1034)</td>
                <td>Treatment (1909)</td>
                <td>Lump (359)</td>
                <td>Fatigue (639)</td>
                <td>Stress (373)</td>
                <td>Morphine (33)</td>
              </tr>
              <tr valign="top">
                <td>All (2163)</td>
                <td>Brain (962)</td>
                <td>Surgery (1909)</td>
                <td>Suffering (333)</td>
                <td>Inflammation (472)</td>
                <td>Swelling (348)</td>
                <td>Salbutamol (33)</td>
              </tr>
              <tr valign="top">
                <td>Breast cancer (804)</td>
                <td>Hand (656)</td>
                <td>Advice (774)</td>
                <td>Confused (305)</td>
                <td>Scared (273)</td>
                <td>Crying (245)</td>
                <td>Tramadol (26)</td>
              </tr>
              <tr valign="top">
                <td>Can (745)</td>
                <td>Head (627)</td>
                <td>Radiation (627)</td>
                <td>Problem (304)</td>
                <td>Nausea (244)</td>
                <td>Mass (220)</td>
                <td>MRSA<sup>a</sup> (14)</td>
              </tr>
              <tr valign="top">
                <td>Tumor (631)</td>
                <td>Hair (549)</td>
                <td>Biopsy (366)</td>
                <td>Attack (277)</td>
                <td>Hurt (205)</td>
                <td>Fear (215)</td>
                <td>Aspirin (11)</td>
              </tr>
              <tr valign="top">
                <td>Disease (563)</td>
                <td>Breast (535)</td>
                <td>Chemotherapy (338)</td>
                <td>Energy (270)</td>
                <td>Sore (202)</td>
                <td>Disability (164)</td>
                <td>Omeprazole (9)</td>
              </tr>
              <tr valign="top">
                <td>TSH<sup>b</sup> (506)</td>
                <td>Chest (511)</td>
                <td>Blood test (268)</td>
                <td>Terrified (266)</td>
                <td>Numb (195)</td>
                <td>Worry (142)</td>
                <td>Seretide (6)</td>
              </tr>
              <tr valign="top">
                <td>Depression (414)</td>
                <td>Heart (503)</td>
                <td>Infusion (199)</td>
                <td>Tired (251)</td>
                <td>Cutting (104)</td>
                <td>Fall (129)</td>
                <td>Citrus (5)</td>
              </tr>
              <tr valign="top">
                <td>Lymphoma (348)</td>
                <td>Neck (459)</td>
                <td>Listening (158)</td>
                <td>Follow up (249)</td>
                <td>Tingling (101)</td>
                <td>Discomfort (120)</td>
                <td>Echinacea (5)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>MRSA: methicillin-resistant <italic>Staphylococcus aureus</italic>.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>TSH: thyroid stimulating hormone.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Relation Extraction</title>
        <p>The system extracted 2 entities (entity 1 and entity 2) found in the same sentence and linked with a relation and then it attributed the type of entities. For instance, entity 1, <italic>Borderline</italic> (disease) co-occurs with <italic>High blood pressure</italic> (symptom) in the sentence. In total, 30,341 relation pairs were extracted, as shown in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
        <p>Of the 30,341 relation pairs, the most frequent entity relation pairs and their number of co-occurrences are shown in <xref ref-type="table" rid="table5">Table 5</xref>.</p>
        <p>The relations between anatomy and disease entity types are the most frequent (5550 pairs). The pair disease-disease co-occurs 4668 times, and the pair anatomy-anatomy appears 3595 times.</p>
        <p><xref ref-type="table" rid="table6">Table 6</xref> contains the 5 most frequent entities per relation pair.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Example of entity relation extraction.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="160"/>
            <col width="120"/>
            <col width="360"/>
            <thead>
              <tr valign="top">
                <td>Analysis result</td>
                <td>Entity 1</td>
                <td>Entity 1 type</td>
                <td>Entity 2</td>
                <td>Entity 2 type</td>
                <td>Sentence from post</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Output 1</td>
                <td>Borderline</td>
                <td>disease</td>
                <td>High blood pressure</td>
                <td>symptom</td>
                <td>Prior to that, I was fat mid forties male borderline high HDL, high blood pressure but ZERO issues with thyroid or immune issues.</td>
              </tr>
              <tr valign="top">
                <td>Output 2</td>
                <td>Optic neuritis</td>
                <td>disease</td>
                <td>Multiple sclerosis</td>
                <td>symptom</td>
                <td>She said that I have something called optic neuritis and that about half the time people get it and they don’t know why but the other half its because someone has multiple sclerosis.</td>
              </tr>
              <tr valign="top">
                <td>Output 3</td>
                <td>Syndrome</td>
                <td>disease</td>
                <td>Nerve</td>
                <td>anatomy</td>
                <td>I had bilateral optic neuritis significantly worse in my left eye in Late August September and I was also simultaneously diagnosed with Browns Syndrome which they’re not 100% convinced on as it may have been misdiagnosed 6th nerve palsy.</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Most frequent entities per relation pair.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="800"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Relation pair</td>
                <td>Co-occurrences, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>anatomy-anatomy</td>
                <td>3595</td>
              </tr>
              <tr valign="top">
                <td>anatomy-disease, disease-anatomy</td>
                <td>5550</td>
              </tr>
              <tr valign="top">
                <td>anatomy-procedure, procedure-anatomy</td>
                <td>1730</td>
              </tr>
              <tr valign="top">
                <td>anatomy-symptom, symptom-anatomy</td>
                <td>1227</td>
              </tr>
              <tr valign="top">
                <td>anatomy-side effect, side effect-anatomy</td>
                <td>1081</td>
              </tr>
              <tr valign="top">
                <td>disease-disease</td>
                <td>4668</td>
              </tr>
              <tr valign="top">
                <td>disease-procedure, procedure-disease</td>
                <td>2540</td>
              </tr>
              <tr valign="top">
                <td>disease-finding, finding-disease</td>
                <td>2128</td>
              </tr>
              <tr valign="top">
                <td>disease-side effect, side effect-disease</td>
                <td>1502</td>
              </tr>
              <tr valign="top">
                <td>disease-symptom, symptom-disease</td>
                <td>1080</td>
              </tr>
              <tr valign="top">
                <td>finding-finding</td>
                <td>303</td>
              </tr>
              <tr valign="top">
                <td>finding-anatomy, anatomy-finding</td>
                <td>1362</td>
              </tr>
              <tr valign="top">
                <td>procedure-procedure</td>
                <td>1023</td>
              </tr>
              <tr valign="top">
                <td>procedure-finding, finding-procedure</td>
                <td>430</td>
              </tr>
              <tr valign="top">
                <td>side effect-side effect</td>
                <td>256</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>The 5 most frequent entities per relation pair.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="50"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="170"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Rank</td>
                <td colspan="6">Pair of entity 1 and entity 2</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Anatomy|Anatomy</td>
                <td>Disease|Disease</td>
                <td>Disease|Side effect</td>
                <td>Disease|Anatomy</td>
                <td>Disease|Procedure</td>
                <td>Disease|Finding</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Back|Hair</td>
                <td>Cancer|ALL</td>
                <td>Depression|Anxiety</td>
                <td>Cancer|Lymph</td>
                <td>Cancer|Surgery</td>
                <td>Asthma|Attack</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Neck|Lymph</td>
                <td>Asthma|Allergy</td>
                <td>Asthma|Anxiety</td>
                <td>Tumor|Blood</td>
                <td>Tumor|Surgery</td>
                <td>Cancer|Suffering</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Brain|Lungs</td>
                <td>Tumor|Seminoma</td>
                <td>Cancer|Fear</td>
                <td>Asthma|Lungs</td>
                <td>Breast cancer|Treatment</td>
                <td>ALL|Follow up</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Lungs|Lymph</td>
                <td>ALL|Asthma</td>
                <td>ALL|Swelling</td>
                <td>ALL|Blood</td>
                <td>Asthma|Advice</td>
                <td>Depression|Suffering</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Head|Hair</td>
                <td>Depression|Fatigue</td>
                <td>Aches|Anxiety</td>
                <td>TSH|Blood</td>
                <td>ALL|Treatment</td>
                <td>Exercise|Muscle tension</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>The most frequent entities in the pair disease-anatomy is <italic>Cancer|Lymph</italic>, in the pair disease-disease is <italic>Cancer|ALL</italic>, and in the pair anatomy-anatomy is <italic>Back|Hair</italic>.</p>
        <p>To summarize, once the most frequent entities were extracted, the results were processed according to the shortest path between each entity pair to produce the graph shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>. Among 2561 nodes and 13,405 edges, this entity network shows that <italic>pain</italic> highly co-occurs with other entities in the network (biggest node, weighted at 0.022461), followed by <italic>cancer</italic> (PageRank score at 0.018057) and <italic>surgery</italic> (PageRank score at 0.015443).</p>
        <p>The node <italic>pain</italic> has connections with other nodes, including <italic>fatigue</italic>, <italic>inflammation</italic>, <italic>stomach</italic>, <italic>joints</italic>, <italic>cancer</italic>, and <italic>chemo</italic>. The node <italic>cancer</italic> is strongly linked to <italic>chemo</italic>, <italic>surgery</italic>, <italic>treatment</italic>, <italic>ALL (acute lymphoblastic leukemia)</italic>, <italic>anxiety</italic>, and <italic>blood</italic>. The nodes of <italic>surgery</italic>, <italic>chemo</italic>, and <italic>treatment</italic> are linked to diseases and body parts. Finally, the entity nodes relating to mental health, such as <italic>anxiety</italic> and <italic>depression</italic>, also appear in the network and associate with other bioentity types. <xref ref-type="table" rid="table7">Table 7</xref> shows the most frequent entities and the corresponding PageRank scores:</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Biomedical entity network.</p>
          </caption>
          <graphic xlink:href="jmir_v21i6e12876_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>The most frequent entities and the corresponding PageRank scores.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="800"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Label</td>
                <td>PageRank score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Pain</td>
                <td>0.022461</td>
              </tr>
              <tr valign="top">
                <td>Cancer</td>
                <td>0.018057</td>
              </tr>
              <tr valign="top">
                <td>Surgery</td>
                <td>0.015443</td>
              </tr>
              <tr valign="top">
                <td>Chemo</td>
                <td>0.014954</td>
              </tr>
              <tr valign="top">
                <td>Treatment</td>
                <td>0.014275</td>
              </tr>
              <tr valign="top">
                <td>Blood</td>
                <td>0.013841</td>
              </tr>
              <tr valign="top">
                <td>Asthma</td>
                <td>0.012554</td>
              </tr>
              <tr valign="top">
                <td>All</td>
                <td>0.010931</td>
              </tr>
              <tr valign="top">
                <td>Brain</td>
                <td>0.010311</td>
              </tr>
              <tr valign="top">
                <td>Fatigue</td>
                <td>0.009626</td>
              </tr>
              <tr valign="top">
                <td>Back</td>
                <td>0.008574</td>
              </tr>
              <tr valign="top">
                <td>Radiation</td>
                <td>0.008317</td>
              </tr>
              <tr valign="top">
                <td>Tumor</td>
                <td>0.007814</td>
              </tr>
              <tr valign="top">
                <td>Neck</td>
                <td>0.006968</td>
              </tr>
              <tr valign="top">
                <td>Hand</td>
                <td>0.006618</td>
              </tr>
              <tr valign="top">
                <td>Lymph</td>
                <td>0.006442</td>
              </tr>
              <tr valign="top">
                <td>Normal</td>
                <td>0.006176</td>
              </tr>
              <tr valign="top">
                <td>Anxiety</td>
                <td>0.006108</td>
              </tr>
              <tr valign="top">
                <td>Head</td>
                <td>0.005933</td>
              </tr>
              <tr valign="top">
                <td>Hair</td>
                <td>0.005762</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Subject-Predicate-Object Entity Relation Extraction</title>
        <p>The system extracted 69,263 subject or object entities. The top 10 entities are shown in <xref ref-type="table" rid="table8">Table 8</xref>. In total, 41,068 relations were extracted and the results were classified into 2 types of subjects: subject pronoun (<italic>I, you, he, she, it, we</italic>, and <italic>they</italic>) and subject noun (<italic>treatment</italic>). The relation pairs are divided into 19,645 pairs of subject pronoun-object entities and 21,423 pairs of subject noun-object entities.</p>
        <p><xref ref-type="table" rid="table9">Table 9</xref> shows 2 examples of the subject-predicate-object relation extraction: the subject (for example <italic>I</italic>, <italic>he</italic>, <italic>anyone</italic>, <italic>it</italic>, and <italic>asthma</italic>), the predicate (verbs such as <italic>have</italic>, <italic>get</italic>, and <italic>increase</italic>), the object (terms such as <italic>eczema</italic>, <italic>allergies</italic>, <italic>childhood asthma</italic>, <italic>my cough</italic>, and <italic>allergy shots</italic>), and the sentence of the corresponding post.</p>
        <sec>
          <title>Subject Pronoun-Predicate-Object</title>
          <p>The subject pronoun-predicate-object relation extraction demonstrates that the most frequent subject pronoun is <italic>I</italic> (11,691 times, including <italic>I’ve</italic>, <italic>I’m</italic>, and <italic>I’d</italic>). Some examples are shown in <xref ref-type="table" rid="table10">Table 10</xref>.</p>
        </sec>
        <sec>
          <title>Subject Noun-Predicate-Object</title>
          <p><xref ref-type="table" rid="table11">Table 11</xref> shows some examples of subject noun-predicate-object relation extraction. Among the 21,423 relation pairs, the most frequent subject nouns are diseases such as <italic>asthma</italic> (272 occurrences), including phrases such as <italic>asthma anxiety</italic>, <italic>asthma attacks</italic>, <italic>my asthma</italic>, <italic>my asthma and allergies</italic>, <italic>my asthma flare</italic>, and <italic>cancer</italic> (226 occurrences).</p>
          <table-wrap position="float" id="table8">
            <label>Table 8</label>
            <caption>
              <p>The top 10 occurrences of subjects and objects.</p>
            </caption>
            <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="800"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td>Entity</td>
                  <td>Count, n</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>I</td>
                  <td>10,314</td>
                </tr>
                <tr valign="top">
                  <td>It</td>
                  <td>2832</td>
                </tr>
                <tr valign="top">
                  <td>Pain</td>
                  <td>2341</td>
                </tr>
                <tr valign="top">
                  <td>She</td>
                  <td>1501</td>
                </tr>
                <tr valign="top">
                  <td>He</td>
                  <td>1323</td>
                </tr>
                <tr valign="top">
                  <td>Cancer</td>
                  <td>1060</td>
                </tr>
                <tr valign="top">
                  <td>Asthma</td>
                  <td>1015</td>
                </tr>
                <tr valign="top">
                  <td>They</td>
                  <td>900</td>
                </tr>
                <tr valign="top">
                  <td>Me</td>
                  <td>719</td>
                </tr>
                <tr valign="top">
                  <td>You</td>
                  <td>597</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <table-wrap position="float" id="table9">
            <label>Table 9</label>
            <caption>
              <p>Examples of subject-predicate-object relation extraction results.</p>
            </caption>
            <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="150"/>
              <col width="150"/>
              <col width="150"/>
              <col width="150"/>
              <col width="400"/>
              <thead>
                <tr valign="top">
                  <td>Analysis result</td>
                  <td>Subject entity</td>
                  <td>Predicate</td>
                  <td>Object entity</td>
                  <td>Sentence</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Output 1</td>
                  <td>I</td>
                  <td>Had</td>
                  <td>the skin allergy test</td>
                  <td>I had the skin allergy test done and it came back positive for almost every kind of pollen and mold, etc.</td>
                </tr>
                <tr valign="top">
                  <td>Output 2</td>
                  <td>my blue inhaler</td>
                  <td>Increases</td>
                  <td>my asthma</td>
                  <td>I noticed consistently my blue inhaler increases my asthma about 30% after using it and I believe was the cause of a recent very bad asthma attack.</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <table-wrap position="float" id="table10">
            <label>Table 10</label>
            <caption>
              <p>Example of subject pronoun-predicate-object relation extraction.</p>
            </caption>
            <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="200"/>
              <col width="200"/>
              <col width="200"/>
              <col width="400"/>
              <thead>
                <tr valign="top">
                  <td>Subject</td>
                  <td>Predicate</td>
                  <td>Object</td>
                  <td>Sentence</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>I</td>
                  <td>take</td>
                  <td>the typical seretide</td>
                  <td>I take the typical seretide morning and night and ventolin when l need it.</td>
                </tr>
                <tr valign="top">
                  <td>I</td>
                  <td>have</td>
                  <td>a deep and painful cough</td>
                  <td>I have a deep and painful cough that's been leaving me with back, chest, and side pains.</td>
                </tr>
                <tr valign="top">
                  <td>You</td>
                  <td>ever take</td>
                  <td>allergy shots</td>
                  <td>Hey, to you asthmatics who have allergy induced asthma, did you ever take allergy shots.</td>
                </tr>
                <tr valign="top">
                  <td>He</td>
                  <td>was given</td>
                  <td>Prednisone</td>
                  <td>He was given prednisone for that as well.</td>
                </tr>
                <tr valign="top">
                  <td>She</td>
                  <td>has</td>
                  <td>Asthma</td>
                  <td>My sister, who lives with me, started complaining to me about it, saying that she doesn’t want me doing that when her daughter is home because she has asthma and it smokes up the house.</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <table-wrap position="float" id="table11">
            <label>Table 11</label>
            <caption>
              <p>Example of subject noun-predicate-object relation extraction.</p>
            </caption>
            <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="200"/>
              <col width="200"/>
              <col width="200"/>
              <col width="400"/>
              <thead>
                <tr valign="top">
                  <td>Subject</td>
                  <td>Predicate</td>
                  <td>Object</td>
                  <td>Sentence</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Asthma</td>
                  <td>is becoming way more than just</td>
                  <td>a physical issue</td>
                  <td>Asthma is becoming way more than just a physical issue, it's taking a toll on my mental health.</td>
                </tr>
                <tr valign="top">
                  <td>Cancer</td>
                  <td>had spread to</td>
                  <td>her bones</td>
                  <td>The doctor told her that cancer had spread to her bones and that she'll have to have injections for it?</td>
                </tr>
                <tr valign="top">
                  <td>Fever</td>
                  <td>is indeed mentioned as</td>
                  <td>a side effect</td>
                  <td>Ive also been using Modulair Montelukast Sodium, and fever is indeed mentioned as a side effect on my leaflet.</td>
                </tr>
                <tr valign="top">
                  <td>Hives</td>
                  <td>are from</td>
                  <td>allergies</td>
                  <td>They’re trying to tell me they are panic attacks but as far as I know hives are from allergies and they sometimes happen during my asthma attacks.</td>
                </tr>
                <tr valign="top">
                  <td>The depression</td>
                  <td>is occurring simultaneously with</td>
                  <td>the increased asthma symptoms</td>
                  <td>I noticed the depression is occurring simultaneously with the increased asthma symptoms and was wondering if there is a correlation and if anyone else has experienced this.</td>
                </tr>
                <tr valign="top">
                  <td>My milk allergy</td>
                  <td>was causing</td>
                  <td>my asthma</td>
                  <td>My milk allergy was causing my asthma.</td>
                </tr>
                <tr valign="top">
                  <td>My second course of prednisone</td>
                  <td>has been great for stopping</td>
                  <td>the wheezing</td>
                  <td>And I'm on my second course of prednisone which has been great for stopping the wheezing - even the rescue inhaler didn't help before.</td>
                </tr>
                <tr valign="top">
                  <td>The doctor</td>
                  <td>ruled out</td>
                  <td>pneumonia</td>
                  <td>Anyway, the doctor ruled out pneumonia and said I had caught a cold on the plane and it had triggered an asthma exacerbation.</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p><xref ref-type="fig" rid="figure3">Figure 3</xref> demonstrates the most used subject and object entities. The results show that the most frequent subject is the pronoun <italic>I</italic> (PageRank score: 0.100619). The pronoun <italic>It</italic>, <italic>She</italic>, <italic>He</italic>, <italic>They</italic>, and <italic>You</italic> are also frequently used. Diseases, body parts, treatments, and symptoms are widely used as the subjects and/or objects as well as the possessive pronoun <italic>my</italic> (<italic>my mother</italic>, <italic>my eyes</italic>, and <italic>my dad</italic>). <xref ref-type="table" rid="table12">Table 12</xref> presents the most frequent subject and object entities and the corresponding PageRank scores.</p>
        </sec>
      </sec>
      <sec>
        <title>Social Media Language</title>
        <p>Expressions that constitute specific terms developed on social media, such as <italic>pm</italic> (private message), <italic>FWIW</italic> (For What it's Worth) were identified in the corpus. “A common feature of microblog texts is the use of symbols in posts, such as the love-heart dingbat symbol” [<xref ref-type="bibr" rid="ref47">47</xref>]. Emoticons such as <inline-graphic xlink:href="jmir_v21i6e12876_fig4.png" mimetype="image" xlink:type="simple"/>, and text-based emoticons such as <italic>LOL</italic> (laughing out loud), :), :-(, =), and :( are also frequent.</p>
        <p>In addition, the corpus contains informal phrases such as “Rooting for you!” and “I’m still chugging along”; adjectives such as <italic>loopy</italic>, <italic>drippy</italic>, <italic>dicey</italic>, and <italic>zonked</italic>; and verbs such as <italic>puke</italic> that substitute for their equivalents in standard language.</p>
        <p>Entities found in the Reddit corpus present numerous morphosyntactic variants. For example, the term <italic>chemotherapy</italic> was rarely found, but the short form <italic>chemo</italic> was frequently used. The disease name <italic>Hodgkin’s Lymphoma</italic> is as <italic>Hodgkin Lymphoma</italic>, <italic>Hodgkins Lymphoma</italic>, <italic>Hodgkin disease</italic>, and <italic>HL</italic>. Similarly, the entity name <italic>Mixed Cellularity Classical Hodgkin Lymphoma</italic> is found as <italic>Mixed Cellularity Hodgkin Lymphoma</italic>, <italic>Mixed Cellularity Hodgkins Lymphoma</italic>, and <italic>MCCHL.</italic> Moreover, there are many abbreviated forms of entity names, such as <italic>ALL</italic>, <italic>AML</italic>, <italic>BRCA2</italic> (breast cancer type 2), <italic>CLL</italic> (chronic lymphocytic leukemia), <italic>CML</italic> (chronic myeloid leukemia), <italic>COPD</italic> (chronic obstructive pulmonary disease), <italic>DCIS</italic> (ductal carcinoma in situ), and <italic>GERD</italic> (gastroesophageal reflux disease). When these forms were included in the disease dictionary, the system managed to detect them. Some examples are presented in <xref ref-type="table" rid="table13">Table 13</xref>.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Subject and object entity network.</p>
          </caption>
          <graphic xlink:href="jmir_v21i6e12876_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table12">
          <label>Table 12</label>
          <caption>
            <p>The most frequent entities and the corresponding PageRank scores.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="800"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Label</td>
                <td>PageRank score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>I</td>
                <td>0.100619</td>
              </tr>
              <tr valign="top">
                <td>It</td>
                <td>0.027234</td>
              </tr>
              <tr valign="top">
                <td>Pain</td>
                <td>0.020651</td>
              </tr>
              <tr valign="top">
                <td>She</td>
                <td>0.014206</td>
              </tr>
              <tr valign="top">
                <td>He</td>
                <td>0.012875</td>
              </tr>
              <tr valign="top">
                <td>Cancer</td>
                <td>0.00939</td>
              </tr>
              <tr valign="top">
                <td>Asthma</td>
                <td>0.009324</td>
              </tr>
              <tr valign="top">
                <td>They</td>
                <td>0.008927</td>
              </tr>
              <tr valign="top">
                <td>Me</td>
                <td>0.006934</td>
              </tr>
              <tr valign="top">
                <td>You</td>
                <td>0.005852</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table13">
          <label>Table 13</label>
          <caption>
            <p>Examples of disease entities in their abbreviated form.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Abbreviated form of entity name</td>
                <td>Full entity name</td>
                <td>Example from the corpus</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ALL</td>
                <td>Acute lymphoblastic leukemia</td>
                <td>My boyfriend was diagnosed with ALL 2 years ago and stayed in remission after a few rounds of chemo.</td>
              </tr>
              <tr valign="top">
                <td>AML</td>
                <td>Acute myeloid leukemia</td>
                <td>Diagnosed with AML this past Sept.</td>
              </tr>
              <tr valign="top">
                <td>BRCA2</td>
                <td>Breast cancer type 2</td>
                <td>Her sister, my aunt, was diagnosed with breast cancer at 27 and was dead by 33 she tested positive for BRCA2 as well.</td>
              </tr>
              <tr valign="top">
                <td>CLL</td>
                <td>Chronic lymphocytic leukemia</td>
                <td>My CLL is more of SLL, which is the same thing but presented in my lymph nodes.</td>
              </tr>
              <tr valign="top">
                <td>CML</td>
                <td>Chronic myeloid leukemia</td>
                <td>25 years old, diagnosed with CML when I was 15.</td>
              </tr>
              <tr valign="top">
                <td>COPD</td>
                <td>Chronic obstructive pulmonary disease</td>
                <td>Hes been smoking for over 40 years, has COPD and isnt in the greatest health generally overweight, inactive, etc.</td>
              </tr>
              <tr valign="top">
                <td>DCIS</td>
                <td>Ductal carcinoma in situ</td>
                <td>We found out last week she has both DCIS and Invasive DCIS.</td>
              </tr>
              <tr valign="top">
                <td>GERD</td>
                <td>Gastroesophageal reflux disease</td>
                <td>Sleep apnea can also worsen GERD, and GERD is known to worsen asthma.</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this paper, we collected user-generated chronic disease–related data from Reddit and extracted information pertinent to biomedical entities and their relations to examine the characteristics of the language used by users in this social media platform. Initially, the corpus was created by semiautomatically extracting posts from specific subforums of Reddit. Next, lexicosemantic resources from various sources were created. To perform the information extraction tasks—entity extraction and relation extraction—the PKDE4J text mining system was used. The system extracted 82,138 biomedical entities and 30,341 relations. These results indicate that the corpus contains a large amount of information.</p>
      </sec>
      <sec>
        <title>Performance of the Tool</title>
        <p>As described in the Results section, the system achieved a high performance in the named entity extraction task and the attribution of entity types (3682/5151, 71.48% extracted entities were correctly labeled). As already mentioned, the language used in Reddit is structured enough, with a satisfactory number of full sentences so the system managed to extract entities and their relations. The error analysis showed that the system failed to detect a number of entities or falsely attributed the entity type, because of lexical errors, to dictionaries’ errors or to ambiguous concepts.</p>
      </sec>
      <sec>
        <title>Entity Extraction</title>
        <p>Entities prominent in the corpus refer to diseases, anatomical terms, procedures, findings, and symptoms. While interpreting the entities extracted from the corpus, it must be taken into account that the corpus was constructed by selecting subreddits created to share information about specific diseases. Therefore, it is expected that entities related to these diseases are the most likely to be represented. For instance, parts of the body affected by specific cancers, such as <italic>breast</italic> or <italic>blood</italic>, occur very frequently.</p>
        <p>The most frequent disease entities in this corpus are oncologic diseases such as <italic>cancer</italic>, <italic>ALL</italic>, and <italic>breast cancer</italic>. Frequently mentioned nononcologic diseases are <italic>asthma</italic>, <italic>depression</italic>, and the generic entity <italic>disease</italic>. The entity <italic>thyroid stimulating hormone</italic> (TSH) is frequently mentioned, but it should be further classified in findings.</p>
        <p>The most frequent anatomy entity is <italic>blood</italic>. This is explained primarily because of the numerous posts speaking about <italic>leukemia</italic> and <italic>lymphoma</italic>. Moreover, people often report the results of blood tests, a situation that increases the number of entities identified.</p>
        <p>Terms tagged as <italic>procedures</italic> extracted from the corpus are mainly linked to oncologic diseases. About 2000 occurrences of <italic>chemo</italic> and <italic>chemotherapy</italic> were extracted. <italic>Chemotherapy</italic> is a significant procedure with numerous side effects. The fact that patients mention it at a high frequency shows that it is a treatment with a strong impact on quality of life and raises a lot of questions and worries for the patients involved. Social intervention procedures such as <italic>listening</italic> and <italic>advice</italic> are also frequent (see <xref ref-type="table" rid="table3">Table 3</xref>). This observation indicates that apart from technical information about treatment and surgeries, people also speak about the support they got during their disease or search for it in the community.</p>
        <p>In medicine, symptoms can be difficult to differentiate from findings. This difference often resides in the context of the phenomenon. In the corpus, entities belonging to those categories as well as the side-effects category can be analyzed together to gain a better understanding of the results. Most frequent entities from these categories are closely related to the patient experiences and feelings. Concepts related to the feeling of fear are the most frequently present in this merged category: 7 out of 30 entities express feelings of fear or related with fear using the words <italic>anxiety</italic>, <italic>stress</italic>, <italic>confused</italic>, <italic>scared</italic>, <italic>terrified</italic>, <italic>fear</italic>, and <italic>worry</italic>. This is coherent with studies on cancer survivors that state the fear of cancer recurrence as almost universal among this population [<xref ref-type="bibr" rid="ref48">48</xref>]. It appears that people with chronic conditions use social media to share feelings they have experienced. The chronic diseases selected in this corpus frequently imply severe impact on lifestyle and decrease life expectancy. Therefore, it is logical that <italic>fear</italic> and <italic>anxiety</italic> are prominent entities in the corpus.</p>
        <p>Health-related quality of life in chronically ill patients is a known field in medical research since numerous years. Questionnaires such as the European Organisation for Research and Treatment of Cancer Quality of Life-C15-Palliative [<xref ref-type="bibr" rid="ref49">49</xref>] or, more recently, the Functional Assessment of Cancer Therapy-General 7 [<xref ref-type="bibr" rid="ref50">50</xref>] and Patient-Reported Outcomes Measurement Information System [<xref ref-type="bibr" rid="ref51">51</xref>] are used routinely to assess it in those populations. When looking at the top concerns raised by patients suffering from cancer [<xref ref-type="bibr" rid="ref50">50</xref>], it is interesting to note that they are in line with the top entities extracted from the corpus. More specifically, the most frequent nondisease entity extracted, <italic>pain</italic>, is a key item in multiple quality-of-life assessment questionnaires. This shows that the experiences that the patients share on social media platforms are coherent with what has been proven to have an impact on their life.</p>
        <p>Overall, entities extracted from the corpus are coherent with similar studies conducted on health-related social media [<xref ref-type="bibr" rid="ref27">27</xref>] and with validated evaluation of the quality of life of patients suffering from chronic diseases.</p>
      </sec>
      <sec>
        <title>Relation Extraction</title>
        <p>The relation extraction performed on this corpus shows that the most highly represented relation type identified is the <italic>disease-anatomy</italic> relation (5550 occurrences). The pairs most frequently representing a disease and its localization are <italic>cancer-lymph</italic>, <italic>asthma-lungs</italic>, and <italic>tumor-blood</italic>. This suggests that people using social media platforms to speak about their chronic diseases are willing to explain which disease they suffer from as well as the location of the disease. This propensity is probably linked to the fact that such subreddits are used to share life experiences and to find people with similar backgrounds. This commonality can be reassuring and informative for a person suffering from the same disease. To find these people and knowledge, it is valuable to share the nature of the disease and its anatomical location.</p>
        <p>The second most frequent entity pair is <italic>disease-disease</italic> (4668 occurrences). Pairs of entities such as <italic>cancer-ALL</italic> (acute lymphoblastic leukemia) and <italic>asthma-allergy</italic> are frequent. This co-occurrence of diseases might be related to the fact that chronic diseases often lead to complications and to other diseases. For example, <italic>asthma-allergy</italic> was perceived from the sentence “have allergy induced asthma.”</p>
        <p>The third most frequent entity pair is <italic>anatomy-anatomy</italic> (3595 occurrences). When looking at specific occurrences of this pair, the pairs <italic>neck-lymph</italic>, <italic>brain-lungs</italic>, and <italic>lungs-lymph</italic> are frequent. Another entity pair, <italic>head-hair</italic> is related to people speaking about the side effects of chemotherapy.</p>
        <p>Relations linking <italic>diseases</italic> to <italic>procedures</italic> are present at a high frequency in the corpus (2540 occurrences). When looking specifically in this category, it is clear that the most frequently identified disease in the corpus, <italic>cancer</italic>, is also most highly represented in those relations.</p>
        <p>The relations extracted from the corpus demonstrate that patients with chronic diseases are willing to share detailed information about their health condition in a structured manner, describing thoroughly the disease, its location, the symptoms it caused, and the effect of treatment.</p>
      </sec>
      <sec>
        <title>Subject-Predicate-Object Entity Relation Extraction</title>
        <p>The language patterns of subject-predicate-object relations demonstrate important characteristics of health social media language. As is apparent in the outputs, subject pronouns and object pronouns were frequently mentioned and were used mostly in the singular first-person pronoun, such as <italic>I</italic>, <italic>me</italic>, and <italic>my</italic>. These patterns are related to the way individuals share personal or family experiences (“I-had-a bad cold or sinus infection,” “Allergens-explains-my severe asthma,” “It-is making-my heartburn,” and “Anyone-develop-eczema”) and feelings (“I have a history of testicular cancer in family so Im pretty scared bht im hoping its nothing”). Also, patients or relatives, after having described their problem, treatment, and possible effects, often ask for advice, as shown in the following sentence:</p>
        <disp-quote>
          <p>I noticed the depression is occurring simultaneously with the increased asthma symptoms and was wondering if there is a correlation and if anyone else has experienced this?</p>
        </disp-quote>
      </sec>
      <sec>
        <title>Social Media Language</title>
        <p>Data derived from clinical narratives and research papers differ significantly from social media content. The language and style used by the authors as well as the content are different. From a linguistic point of view, medical blogs usually consist of syntactically correct sentences but can contain verbless clauses or sentences without subjects [<xref ref-type="bibr" rid="ref52">52</xref>]. Abbreviations, enumerations, and citations of conversations, medical terms, and opinion-related words are used frequently in medical blog posts and websites. As stated in the study by Korkontzelos et al [<xref ref-type="bibr" rid="ref53">53</xref>], “in social media, users rarely use technical terms.” Moreover, emoticons are very often used to convey emotion or to give contextual information to correctly understand a message (such as irony or sarcasm). The corpus processed in this research confirmed these observations.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are 2 major limitations of the PKDE4J tool with regard to the objectives of this paper. First, PKDE4J was initially developed for the processing of well-structured biomedical texts and not for social media text. This issue has a relatively less impact on this paper given that the entity extraction task is based on dictionaries. However, for tasks such as part-of-speech and sentence parsing needed for the extraction of relations, the informality of social media text poses a challenge. Second, the lack of terms from the dictionaries as well as lexical and semantic ambiguities lowered the performance of the system. For instance, abbreviations and acronyms can have multiple interpretations, and this can lead to ambiguities. In the current version of the system, these types of ambiguities are not handled. Consequently, all occurrences of <italic>ALL</italic> found in the corpus were extracted, even those that do not refer to the disease <italic>Acute Lymphocytic Leukemia</italic>. Also, the lexical unit <italic>back</italic> has sometimes been falsely recognized as a body part.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Data from social media platforms devoted to health can provide valuable information about the experiences of the patients involved. In this paper, we reported the application of an information extraction approach using the PKDE4J tool to detect, extract, and visualize chronic disease entities and relations and to identify characteristics of the social media language in a corpus collected from Reddit.</p>
        <p>In the Results section, we showed which disease entities are frequently mentioned and which are the most frequent relation pairs. Relation extraction demonstrated that the most frequent relation pair is the <italic>disease-anatomy</italic> pair and the subject-object relation pattern in the social media language is the use of the first-person pronoun provided that people share personal experiences.</p>
        <p>Although data privacy and information sharing is becoming a major concern in research and legal frameworks, such as the General Data Protection Regulation law, have begun to set boundaries for the storage and sharing of information generated by users, it is interesting that despite those concerns, users are willing to share private health information in open social networks.</p>
        <p>Further research should focus on the enrichment of dictionaries and adaptation of rules to common usages of social media language and the processing of emoticons for the sentiment analysis task. Finally, the identification of the type of semantic relations and the evaluation on the relation extraction results should be performed to assess the performance of the system in this task.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ALL</term>
          <def>
            <p>acute lymphoblastic leukemia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AML</term>
          <def>
            <p>acute myeloid leukemia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BRCA2</term>
          <def>
            <p>breast cancer type 2</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CLL</term>
          <def>
            <p>chronic lymphocytic leukemia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CML</term>
          <def>
            <p>chronic myeloid leukemia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">COPD</term>
          <def>
            <p>chronic obstructive pulmonary disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">C-topic</term>
          <def>
            <p>conditional topic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">DCIS</term>
          <def>
            <p>ductal carcinoma in situ</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">FWIW</term>
          <def>
            <p>For What it's Worth</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">GERD</term>
          <def>
            <p>gastroesophageal reflux disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">LOL</term>
          <def>
            <p>laughing out loud</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">MRSA</term>
          <def>
            <p>methicillin-resistant <italic>Staphylococcus aureus</italic></p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">TSH</term>
          <def>
            <p>thyroid stimulating hormone</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the Ministry of Education of the Republic of Korea and the National Research Foundation of Korea (NRF-2018S1A3A2075114) and the University of Geneva, Switzerland.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>VF, TT, and CGB contributed equally to the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>CL is editor-in-chief for JMIR Medical Informatics.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Denecke</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <source>Health Web Science: Social Media Data for Healthcare</source>  
        <year>2015</year>  
        <publisher-loc>New York</publisher-loc>
        <publisher-name>Springer International Publishing</publisher-name></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Patel</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Greysen</surname>
            <given-names>SR</given-names>
          </name>
          <name name-style="western">
            <surname>Chopra</surname>
            <given-names>V</given-names>
          </name>
        </person-group>
        <article-title>Social media use in chronic disease: a systematic review and novel taxonomy</article-title>
        <source>Am J Med</source>  
        <year>2015</year>  
        <month>12</month>  
        <volume>128</volume>  
        <issue>12</issue>  
        <fpage>1335</fpage>  
        <lpage>50</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.amjmed.2015.06.015</pub-id>
        <pub-id pub-id-type="medline">26159633</pub-id>
        <pub-id pub-id-type="pii">S0002-9343(15)00565-3</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
        <source>ReferralMD</source>  
        <year>2017</year>  
        <access-date>2019-06-03</access-date>
        <comment>30 Facts &amp; Stats on Social Media and Healthcare 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://getreferralmd.com/2017/01/30-facts-statistics-on-social-media-and-healthcare/">https://getreferralmd.com/2017/01/30-facts-statistics-on-social-media-and-healthcare/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78qwn2Dif"/></comment> </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
        <source>Pew Research Center</source>  
        <access-date>2019-06-03</access-date>
        <comment>Chronic Disease and the Internet 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewinternet.org/2010/03/24/chronic-disease-and-the-internet/">https://www.pewinternet.org/2010/03/24/chronic-disease-and-the-internet/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78qx81xnX"/></comment> </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Moorhead</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hazlett</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Harrison</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Carroll</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Irwin</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hoving</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>A new dimension of health care: systematic review of the uses, benefits, and limitations of social media for health communication</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <month>04</month>  
        <day>23</day>  
        <fpage>e85</fpage>  
        <pub-id pub-id-type="doi">10.2196/jmir.1933</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Marshall</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>CC</given-names>
          </name>
          <name name-style="western">
            <surname>Ping</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Avis</surname>
            <given-names>NE</given-names>
          </name>
          <name name-style="western">
            <surname>Ip</surname>
            <given-names>EH</given-names>
          </name>
        </person-group>
        <article-title>Symptom clusters in women with breast cancer: an analysis of data from social media and a research study</article-title>
        <source>Qual Life Res</source>  
        <year>2016</year>  
        <month>03</month>  
        <volume>25</volume>  
        <issue>3</issue>  
        <fpage>547</fpage>  
        <lpage>57</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26476836"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1007/s11136-015-1156-7</pub-id>
        <pub-id pub-id-type="medline">26476836</pub-id>
        <pub-id pub-id-type="pii">10.1007/s11136-015-1156-7</pub-id>
        <pub-id pub-id-type="pmcid">PMC5129624</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Myslín</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Zhu</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Chapman</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Conway</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Using twitter to examine smoking behavior and perceptions of emerging tobacco products</article-title>
        <source>J Med Internet Res</source>  
        <year>2013</year>  
        <volume>15</volume>  
        <issue>8</issue>  
        <fpage>e174</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2013/8/e174/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.2534</pub-id>
        <pub-id pub-id-type="medline">23989137</pub-id>
        <pub-id pub-id-type="pii">v15i8e174</pub-id>
        <pub-id pub-id-type="pmcid">PMC3758063</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chou</surname>
            <given-names>WS</given-names>
          </name>
          <name name-style="western">
            <surname>Prestin</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Kunath</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Obesity in social media: a mixed methods analysis</article-title>
        <source>Transl Behav Med</source>  
        <year>2014</year>  
        <month>09</month>  
        <volume>4</volume>  
        <issue>3</issue>  
        <fpage>314</fpage>  
        <lpage>23</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25264470"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1007/s13142-014-0256-1</pub-id>
        <pub-id pub-id-type="medline">25264470</pub-id>
        <pub-id pub-id-type="pii">256</pub-id>
        <pub-id pub-id-type="pmcid">PMC4167901</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sharma</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Wigginton</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Meurk</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Ford</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Gartner</surname>
            <given-names>CE</given-names>
          </name>
        </person-group>
        <article-title>Motivations and limitations associated with vaping among people with mental illness: a qualitative analysis of Reddit discussions</article-title>
        <source>Int J Environ Res Public Health</source>  
        <year>2016</year>  
        <month>12</month>  
        <day>22</day>  
        <volume>14</volume>  
        <issue>1</issue>  
        <fpage>7</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.mdpi.com/resolver?pii=ijerph14010007"/>
        </comment>  
        <pub-id pub-id-type="doi">10.3390/ijerph14010007</pub-id>
        <pub-id pub-id-type="medline">28025516</pub-id>
        <pub-id pub-id-type="pii">ijerph14010007</pub-id>
        <pub-id pub-id-type="pmcid">PMC5295258</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Park</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Conway</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Tracking health related discussions on reddit for public health applications</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2017</year>  
        <volume>2017</volume>  
        <fpage>1362</fpage>  
        <lpage>71</lpage>  
        <pub-id pub-id-type="medline">29854205</pub-id>
        <pub-id pub-id-type="pmcid">PMC5977623</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pandrekar</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Gopalkrishna</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Srivastava</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Saltz</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Saltz</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>Social media based analysis of opioid epidemic using Reddit</article-title>
        <source>AMIA Annu Symp Proc</source>  
        <year>2018</year>  
        <volume>2018</volume>  
        <fpage>867</fpage>  
        <lpage>76</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30815129"/>
        </comment>  
        <pub-id pub-id-type="medline">30815129</pub-id>
        <pub-id pub-id-type="pmcid">PMC6371364</pub-id></nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sumner</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Galik</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Mathieu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ward</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kiley</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Bartholow</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Dingwall</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Mork</surname>
            <given-names>P</given-names>
          </name>
        </person-group>
        <article-title>Temporal and geographic patterns of social media posts about an emerging suicide game</article-title>
        <source>J Adolesc Health</source>  
        <year>2019</year>  
        <month>02</month>  
        <day>25</day>  
        <fpage>1</fpage>  
        <lpage>7</lpage>  
        <comment>(forthcoming)</comment>  
        <pub-id pub-id-type="doi">10.1016/j.jadohealth.2018.12.025</pub-id>
        <pub-id pub-id-type="medline">30819581</pub-id>
        <pub-id pub-id-type="pii">S1054-139X(19)30018-7</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rindflesch</surname>
            <given-names>TC</given-names>
          </name>
          <name name-style="western">
            <surname>Tanabe</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Weinstein</surname>
            <given-names>JN</given-names>
          </name>
          <name name-style="western">
            <surname>Hunter</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>EDGAR: extraction of drugs, genes and relations from the biomedical literature</article-title>
        <source>Pac Symp Biocomput</source>  
        <year>2000</year>  
        <fpage>517</fpage>  
        <lpage>28</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://psb.stanford.edu/psb-online/proceedings/psb00/abstracts/p517.html"/>
        </comment>  
        <pub-id pub-id-type="medline">10902199</pub-id>
        <pub-id pub-id-type="pmcid">PMC2709525</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>Large-scale extraction of accurate drug-disease treatment pairs from biomedical literature for drug repurposing</article-title>
        <source>BMC Bioinformatics</source>  
        <year>2013</year>  
        <month>06</month>  
        <day>6</day>  
        <volume>14</volume>  
        <issue>1</issue>  
        <fpage>181</fpage>  
        <pub-id pub-id-type="doi">10.1186/1471-2105-14-181</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zhu</surname>
            <given-names>Yongjun</given-names>
          </name>
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Min</given-names>
          </name>
          <name name-style="western">
            <surname>Yan</surname>
            <given-names>Erjia</given-names>
          </name>
        </person-group>
        <article-title>Identifying Liver Cancer and Its Relations with Diseases, Drugs, and Genes: A Literature-Based Approach</article-title>
        <source>PLoS One</source>  
        <year>2016</year>  
        <month>05</month>  
        <day>19</day>  
        <volume>11</volume>  
        <issue>5</issue>  
        <fpage>e0156091</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0156091"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0156091</pub-id>
        <pub-id pub-id-type="medline">27195695</pub-id>
        <pub-id pub-id-type="pii">PONE-D-15-37279</pub-id>
        <pub-id pub-id-type="pmcid">PMC4873143</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Segura-Bedmar</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Martínez</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Revert</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Moreno-Schneider</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Exploring Spanish health social media for detecting drug effects</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2015</year>  
        <month>06</month>  
        <day>15</day>  
        <volume>15</volume>  
        <issue>2</issue>  
        <fpage>S6</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-15-S2-S6"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6947-15-S2-S6</pub-id>
        <pub-id pub-id-type="medline">26100267</pub-id>
        <pub-id pub-id-type="pii">1472-6947-15-S2-S6</pub-id>
        <pub-id pub-id-type="pmcid">PMC4474583</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
        <source>MeaningCloud</source>  
        <access-date>2019-06-03</access-date>
        <comment>Text Analytics – MeaningCloud text mining solutions 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.meaningcloud.com/">https://www.meaningcloud.com/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78r622IFx"/></comment> </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kang</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>YH</given-names>
          </name>
          <name name-style="western">
            <surname>Jeon</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Application of public knowledge discovery tool (PKDE4J) to represent biomedical scientific knowledge</article-title>
        <source>Front Res Metr Anal</source>  
        <year>2018</year>  
        <fpage>1</fpage>  
        <lpage>16</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.semanticscholar.org/paper/Application-of-Public-Knowledge-Discovery-Tool-to-Song-Kim/76b2cdb7fcfd4da408446cf31ca980c8f2cec02e"/>
        </comment>  
        <pub-id pub-id-type="doi">10.3389/frma.2018.00007</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kang</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>An</surname>
            <given-names>JY</given-names>
          </name>
        </person-group>
        <article-title>Investigating drug–disease interactions in drug–symptom–disease triples via citation relations</article-title>
        <source>J Assoc Inf Sci Technol</source>  
        <year>2018</year>  
        <month>07</month>  
        <day>30</day>  
        <fpage>1355</fpage>  
        <lpage>68</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/full/10.1002/asi.24060"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1002/asi.24060</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Beak</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Song</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Constructing Linguistic Verb Source for Relation Extraction</article-title>
        <source>Proceedings of the 25th ACM International on Conference on Information and Knowledge Management</source>  
        <year>2016</year>  
        <conf-name>CIKM'16</conf-name>
        <conf-date>October 24-28, 2016</conf-date>
        <conf-loc>Indianapolis, Indiana, USA</conf-loc>
        <fpage>2511</fpage>  
        <lpage>2</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dtmbio.net/dtmbio2016/pdf/8.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Amplayo</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Song</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <source>ACL Member Portal</source>  
        <year>2016</year>  
        <access-date>2019-06-03</access-date>
        <comment>Building Content-driven Entity Networks for Scarce Scientific Literature using Content Information 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://aclweb.org/anthology/W16-5103">https://aclweb.org/anthology/W16-5103</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78r7Ga6MV"/></comment> </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Phung</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Dao</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Venkatesh</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Berk</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Affective and content analysis of online depression communities</article-title>
        <source>IEEE Trans Affective Comput</source>  
        <year>2014</year>  
        <month>07</month>  
        <day>1</day>  
        <volume>5</volume>  
        <issue>3</issue>  
        <fpage>217</fpage>  
        <lpage>26</lpage>  
        <pub-id pub-id-type="doi">10.1109/Taffc.2014.2315623</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Monnier</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Laken</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Carter</surname>
            <given-names>CL</given-names>
          </name>
        </person-group>
        <article-title>Patient and caregiver interest in internet-based cancer services</article-title>
        <source>Cancer Pract</source>  
        <year>2002</year>  
        <month>11</month>  
        <volume>10</volume>  
        <issue>6</issue>  
        <fpage>305</fpage>  
        <lpage>10</lpage>  
        <pub-id pub-id-type="doi">10.1046/j.1523-5394.2002.106005.x</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>O'Neill</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Ziebland</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Valderas</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Lupiáñez-Villanueva</surname>
            <given-names>F</given-names>
          </name>
        </person-group>
        <article-title>User-generated online health content: a survey of internet users in the United Kingdom</article-title>
        <source>J Med Internet Res</source>  
        <year>2014</year>  
        <month>04</month>  
        <day>30</day>  
        <volume>16</volume>  
        <issue>4</issue>  
        <fpage>e118</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2014/4/e118/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.3187</pub-id>
        <pub-id pub-id-type="medline">24784798</pub-id>
        <pub-id pub-id-type="pii">v16i4e118</pub-id>
        <pub-id pub-id-type="pmcid">PMC4019776</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Deng</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Health-related hot topic detection in online communities using text clustering</article-title>
        <source>PLoS One</source>  
        <year>2013</year>  
        <month>02</month>  
        <volume>8</volume>  
        <issue>2</issue>  
        <fpage>e56221</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0056221"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0056221</pub-id>
        <pub-id pub-id-type="medline">23457530</pub-id>
        <pub-id pub-id-type="pii">PONE-D-12-27785</pub-id>
        <pub-id pub-id-type="pmcid">PMC3574139</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>AJ</given-names>
          </name>
          <name name-style="western">
            <surname>Kuo</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Mining health social media with sentiment analysis</article-title>
        <source>J Med Syst</source>  
        <year>2016</year>  
        <month>11</month>  
        <volume>40</volume>  
        <issue>11</issue>  
        <fpage>236</fpage>  
        <pub-id pub-id-type="doi">10.1007/s10916-016-0604-4</pub-id>
        <pub-id pub-id-type="medline">27663246</pub-id>
        <pub-id pub-id-type="pii">10.1007/s10916-016-0604-4</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Tapi</surname>
            <given-names>NMD</given-names>
          </name>
          <name name-style="western">
            <surname>Bringay</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Lavergne</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Mollevi</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Opitz</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>What patients can tell us: topic analysis for social media on breast cancer</article-title>
        <source>JMIR Med Inform</source>  
        <year>2017</year>  
        <month>07</month>  
        <day>31</day>  
        <volume>5</volume>  
        <issue>3</issue>  
        <fpage>e23</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://medinform.jmir.org/2017/3/e23/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/medinform.7779</pub-id>
        <pub-id pub-id-type="medline">28760725</pub-id>
        <pub-id pub-id-type="pii">v5i3e23</pub-id>
        <pub-id pub-id-type="pmcid">PMC5556259</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Carelle</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Piotto</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Bellanger</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Germanaud</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Thuillier</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Khayat</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Changing patient perceptions of the side effects of cancer chemotherapy</article-title>
        <source>Cancer</source>  
        <year>2002</year>  
        <month>07</month>  
        <day>1</day>  
        <volume>95</volume>  
        <issue>1</issue>  
        <fpage>155</fpage>  
        <lpage>63</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/resolve/openurl?genre=article&amp;sid=nlm:pubmed&amp;issn=0008-543X&amp;date=2002&amp;volume=95&amp;issue=1&amp;spage=155"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1002/cncr.10630</pub-id>
        <pub-id pub-id-type="medline">12115329</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sohl</surname>
            <given-names>SJ</given-names>
          </name>
          <name name-style="western">
            <surname>Schnur</surname>
            <given-names>JB</given-names>
          </name>
          <name name-style="western">
            <surname>Montgomery</surname>
            <given-names>GH</given-names>
          </name>
        </person-group>
        <article-title>A meta-analysis of the relationship between response expectancies and cancer treatment-related side effects</article-title>
        <source>J Pain Symptom Manage</source>  
        <year>2009</year>  
        <month>11</month>  
        <volume>38</volume>  
        <issue>5</issue>  
        <fpage>775</fpage>  
        <lpage>84</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19775863"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jpainsymman.2009.01.008</pub-id>
        <pub-id pub-id-type="medline">19775863</pub-id>
        <pub-id pub-id-type="pii">S0885-3924(09)00701-5</pub-id>
        <pub-id pub-id-type="pmcid">PMC2783563</pub-id></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Aslam</surname>
            <given-names>MS</given-names>
          </name>
          <name name-style="western">
            <surname>Naveed</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Ahmed</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Abbas</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Gull</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Athar</surname>
            <given-names>MA</given-names>
          </name>
        </person-group>
        <article-title>Side effects of chemotherapy in cancer patients and evaluation of patients opinion about starvation based differential chemotherapy</article-title>
        <source>J Cancer Ther</source>  
        <year>2014</year>  
        <volume>05</volume>  
        <issue>08</issue>  
        <fpage>817</fpage>  
        <lpage>22</lpage>  
        <pub-id pub-id-type="doi">10.4236/jct.2014.58089</pub-id></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Spasić</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Livsey</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Keane</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Nenadić</surname>
            <given-names>G</given-names>
          </name>
        </person-group>
        <article-title>Text mining of cancer-related information: review of current status and future directions</article-title>
        <source>Int J Med Inform</source>  
        <year>2014</year>  
        <month>09</month>  
        <volume>83</volume>  
        <issue>9</issue>  
        <fpage>605</fpage>  
        <lpage>23</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(14)00110-5"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2014.06.009</pub-id>
        <pub-id pub-id-type="medline">25008281</pub-id>
        <pub-id pub-id-type="pii">S1386-5056(14)00110-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
        <source>Reddit</source>  
        <access-date>2019-06-03</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.reddit.com/">https://www.reddit.com/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78r7x3juM"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Johnson</surname>
            <given-names>GJ</given-names>
          </name>
          <name name-style="western">
            <surname>Ambrose</surname>
            <given-names>PJ</given-names>
          </name>
        </person-group>
        <article-title>Neo-tribes: the power and potential of online communities in health care</article-title>
        <source>Commun ACM</source>  
        <year>2006</year>  
        <month>01</month>  
        <day>1</day>  
        <volume>49</volume>  
        <issue>1</issue>  
        <fpage>107</fpage>  
        <lpage>13</lpage>  
        <pub-id pub-id-type="doi">10.1145/1107458.1107463</pub-id></nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
        <source>RedditBlog</source>  
        <access-date>2019-06-03</access-date>
        <comment>Top Posts of 2013, Stats, and Snoo Year’s Resolutions 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://redditblog.com/2013/12/31/top-posts-of-2013-stats-and-snoo-years-resolutions/">https://redditblog.com/2013/12/31/top-posts-of-2013-stats-and-snoo-years-resolutions/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78r86wPbY"/></comment> </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
        <source>Subreddit Stats</source>  
        <access-date>2019-06-03</access-date>
        <comment>/r/cancer stats 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://subredditstats.com/r/cancer">https://subredditstats.com/r/cancer</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78r8J8oUb"/></comment> </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
        <source>SNOMED</source>  
        <access-date>2019-06-03</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.snomed.org/">http://www.snomed.org/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78rD4S5yd"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
        <source>National Center for Biotechnology Information</source>  
        <access-date>2019-06-03</access-date>
        <comment>Medical Subject Headings 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ncbi.nlm.nih.gov/mesh">https://www.ncbi.nlm.nih.gov/mesh</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78rDzIavh"/></comment> </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
        <source>Gene Ontology (GO) Knowledge Base</source>  
        <access-date>2019-06-03</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://geneontology.org/">http://geneontology.org/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78rEGtUWi"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
        <source>GenomeNet</source>  
        <access-date>2019-06-05</access-date>
        <comment>Kyoto Encyclopedia of Genes and Genomes (KEGG) disease database 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.genome.jp/kegg/disease/">https://www.genome.jp/kegg/disease/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78tdhhRi4"/></comment> </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
        <source>DrugBank</source>  
        <access-date>2019-06-03</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.drugbank.ca/">https://www.drugbank.ca/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78rFjahY6"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
        <source>National Library of Medicine</source>  
        <access-date>2019-06-03</access-date>
        <comment>Unified Medical Language System 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/research/umls/">https://www.nlm.nih.gov/research/umls/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78rG0jTjh"/></comment> </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
        <source>TSMM (Text &amp; Social Media Mining) Lab - Yonsei University</source>  
        <access-date>2019-06-03</access-date>
        <comment>PKDE4J 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://informatics.yonsei.ac.kr/pkde4j/">http://informatics.yonsei.ac.kr/pkde4j/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78rGLBh9h"/></comment> </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
        <source>Stanford CoreNLP</source>  
        <access-date>2019-06-03</access-date>
        <comment>Introduction to pipelines 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://stanfordnlp.github.io/CoreNLP/pipelines.html">https://stanfordnlp.github.io/CoreNLP/pipelines.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78rGV2Gpd"/></comment> </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Heo</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Kang</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>PKDE4J: Entity and relation extraction for public knowledge discovery</article-title>
        <source>J Biomed Inform</source>  
        <year>2015</year>  
        <month>10</month>  
        <fpage>320</fpage>  
        <lpage>32</lpage>  
        <pub-id pub-id-type="medline">26277115</pub-id></nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
        <source>Gephi - The Open Graph Viz Platform</source>  
        <access-date>2019-06-03</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://gephi.org/">https://gephi.org/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78rGm1XPj"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
        <source>Wiktionary</source>  
        <access-date>2019-06-03</access-date>
        <comment>Disease 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://en.wiktionary.org/wiki/disease">https://en.wiktionary.org/wiki/disease</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78rGytBCe"/></comment> </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zappavigna</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <source>Academia.edu</source>  
        <access-date>2019-06-03</access-date>
        <comment>Discourse of Twitter and Social Media: How we use language to create affiliation on the web 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.academia.edu/18311721/Discourse_of_Twitter_and_Social_Media_How_we_use_language_to_create_affiliation_on_the_web">https://www.academia.edu/18311721/Discourse_of_Twitter_and_Social_Media_How_we_use_language_to_create_affiliation_on_the_web</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78rH6DFwW"/></comment> </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Simard</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Savard</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Fear of Cancer Recurrence Inventory: development and initial validation of a multidimensional measure of fear of cancer recurrence</article-title>
        <source>Support Care Cancer</source>  
        <year>2009</year>  
        <month>03</month>  
        <volume>17</volume>  
        <issue>3</issue>  
        <fpage>241</fpage>  
        <lpage>51</lpage>  
        <pub-id pub-id-type="doi">10.1007/s00520-008-0444-y</pub-id>
        <pub-id pub-id-type="medline">18414902</pub-id></nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Groenvold</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Petersen</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Aaronson</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Arraras</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Blazeby</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Bottomley</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Fayers</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>de Graeff</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hammerlid</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Kaasa</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Sprangers</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Bjorner</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>EORTC QLQ-C15-PAL: the new standard in the assessment of health-related quality of life in advanced cancer?</article-title>
        <source>Palliat Med</source>  
        <year>2006</year>  
        <month>03</month>  
        <volume>20</volume>  
        <issue>2</issue>  
        <fpage>59</fpage>  
        <lpage>61</lpage>  
        <pub-id pub-id-type="medline">16613400</pub-id></nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yanez</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Pearman</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Lis</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Beaumont</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Cella</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>The FACT-G7: a rapid version of the functional assessment of cancer therapy-general (FACT-G) for monitoring symptoms and concerns in oncology practice and research</article-title>
        <source>Ann Oncol Off J Eur Soc Med Oncol</source>  
        <year>2013</year>  
        <month>04</month>  
        <volume>24</volume>  
        <issue>4</issue>  
        <fpage>1073</fpage>  
        <lpage>8</lpage>  
        <pub-id pub-id-type="medline">23136235</pub-id></nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cella</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Riley</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Stone</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Rothrock</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Reeve</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Yount</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Amtmann</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Bode</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Buysse</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Choi</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Cook</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Devellis</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>DeWalt</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Fries</surname>
            <given-names>JF</given-names>
          </name>
          <name name-style="western">
            <surname>Gershon</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Hahn</surname>
            <given-names>EA</given-names>
          </name>
          <name name-style="western">
            <surname>Lai</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Pilkonis</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Revicki</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Rose</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Weinfurt</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Hays</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>nitial adult health item banks and first wave testing of the Patient-Reported Outcomes Measurement Information System (PROMIS™) network: 2005–2008</article-title>
        <source>J Clin Epidemiol</source>  
        <year>2010</year>  
        <month>11</month>  
        <volume>63</volume>  
        <issue>11</issue>  
        <fpage>1179</fpage>  
        <lpage>94</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20685078"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jclinepi.2010.04.011</pub-id>
        <pub-id pub-id-type="medline">20685078</pub-id>
        <pub-id pub-id-type="pii">S0895-4356(10)00173-3</pub-id>
        <pub-id pub-id-type="pmcid">PMC2965562</pub-id></nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Denecke</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Nejdl</surname>
            <given-names>W</given-names>
          </name>
        </person-group>
        <article-title>How valuable is medical social media data? Content analysis of the medical web</article-title>
        <source>Inf Sci</source>  
        <year>2009</year>  
        <month>05</month>  
        <day>30</day>  
        <volume>179</volume>  
        <issue>12</issue>  
        <fpage>1870</fpage>  
        <lpage>80</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.ins.2009.01.025</pub-id></nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Korkontzelos</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Nikfarjam</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Shardlow</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Sarker</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Ananiadou</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Gonzalez</surname>
            <given-names>GH</given-names>
          </name>
        </person-group>
        <article-title>Analysis of the effect of sentiment analysis on extracting adverse drug reactions from tweets and forum posts</article-title>
        <source>J Biomed Inform</source>  
        <year>2016</year>  
        <month>08</month>  
        <volume>62</volume>  
        <fpage>148</fpage>  
        <lpage>58</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(16)30050-8"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2016.06.007</pub-id>
        <pub-id pub-id-type="medline">27363901</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(16)30050-8</pub-id>
        <pub-id pub-id-type="pmcid">PMC4981644</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
