<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i12e18767</article-id>
      <article-id pub-id-type="pmid">33284127</article-id>
      <article-id pub-id-type="doi">10.2196/18767</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using Social Media Data to Understand Consumers' Information Needs and Emotions Regarding Cancer: Ontology-Based Data Analysis Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Erdley</surname>
            <given-names>W</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Brixey</surname>
            <given-names>Juliana</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Christiansen</surname>
            <given-names>Mats</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Jooyun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3156-6619</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Park</surname>
            <given-names>Hyeoun-Ae</given-names>
          </name>
          <degrees>PhD, FAAN, FACMI</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>College of Nursing</institution>
            <institution>Seoul National University</institution>
            <addr-line>103 Daehak-ro, Jongno-gu</addr-line>
            <addr-line>Seoul, 03080</addr-line>
            <country>Republic of Korea</country>
            <phone>82 2 740 8827</phone>
            <fax>82 2 766 1852</fax>
            <email>hapark@snu.ac.kr</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3770-4998</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Park</surname>
            <given-names>Seul Ki</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7441-8010</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Tae-Min</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3612-4350</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>College of Nursing</institution>
        <institution>Gachon University</institution>
        <addr-line>Incheon</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>College of Nursing</institution>
        <institution>Seoul National University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Health Management</institution>
        <institution>Sahmyook University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hyeoun-Ae Park <email>hapark@snu.ac.kr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>12</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>7</day>
        <month>12</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>12</issue>
      <elocation-id>e18767</elocation-id>
      <history>
        <date date-type="received">
          <day>17</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>6</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>11</day>
          <month>11</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Jooyun Lee, Hyeoun-Ae Park, Seul Ki Park, Tae-Min Song. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 07.12.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/12/e18767/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Analysis of posts on social media is effective in investigating health information needs for disease management and identifying people’s emotional status related to disease. An ontology is needed for semantic analysis of social media data.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study was performed to develop a cancer ontology with terminology containing consumer terms and to analyze social media data to identify health information needs and emotions related to cancer.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A cancer ontology was developed using social media data, collected with a crawler, from online communities and blogs between January 1, 2014 and June 30, 2017 in South Korea. The relative frequencies of posts containing ontology concepts were counted and compared by cancer type.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The ontology had 9 superclasses, 213 class concepts, and 4061 synonyms. Ontology-driven natural language processing was performed on the text from 754,744 cancer-related posts. Colon, breast, stomach, cervical, lung, liver, pancreatic, and prostate cancer; brain tumors; and leukemia appeared most in these posts. At the superclass level, risk factor was the most frequent, followed by emotions, symptoms, treatments, and dealing with cancer.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Information needs and emotions differed according to cancer type. The observations of this study could be used to provide tailored information to consumers according to cancer type and care process. Attention should be paid to provision of cancer-related information to not only patients but also their families and the general public seeking information on cancer.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>social media</kwd>
        <kwd>ontology</kwd>
        <kwd>cancer</kwd>
        <kwd>health information needs</kwd>
        <kwd>cancer information</kwd>
        <kwd>emotion</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Dealing with cancer is both physically and mentally difficult, and patients require information on not only cancer itself but also on how to live with cancer [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. There are time and situation constraints [<xref ref-type="bibr" rid="ref3">3</xref>] that can hinder fulfillment of these requirements by health care providers such as physicians and nurses. Moreover, such information needs cannot be met by family members owing to their lack of expertise [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
      <p>Health care consumers often use social media to exchange information, share experiences, and seek emotional support. They seek information from social media about diseases, treatments, and statistics to understand the disease and for help in making decisions. They also use social media to relieve anxiety and promote comfort by sharing their experiences and feelings, arising with cancer [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. High-quality information is provided by cancer information portals, most of which are operated by government or professional societies. However, these portals are not designed to support sharing of experiences and feelings among patients. Therefore, consumers use social media to interact with each other by writing and reading about their shared experiences and feelings.</p>
      <p>People use social media to share opinions, perceptions, concerns, and worries about health conditions [<xref ref-type="bibr" rid="ref6">6</xref>]. Such posts have proven to be effective in identifying the interests and concerns of health care consumers related to the prevention, diagnosis, treatment, and management of diseases and the emotions related to diseases [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. In recent years, there have been many studies [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>] that extract health-related topics from social media data. The premise of these studies is that the topics posted on social media and their frequencies reflect the extent of consumers’ health information needs [<xref ref-type="bibr" rid="ref8">8</xref>]. A thorough understanding of these needs would be helpful in providing tailored information to consumers.</p>
      <p>Text clustering [<xref ref-type="bibr" rid="ref8">8</xref>] and machine learning [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>] have been widely used to extract health-related topics from social media data. Lu et al [<xref ref-type="bibr" rid="ref7">7</xref>] integrated medical terminology using the Unified Medical Language System to reflect the structure of medical knowledge in text clustering of messages posted by patients with lung cancer, breast cancer, and diabetes on online health communities. They were able to detect health-related topics effectively using this approach. The use of specific ontology for the domain of interest is helpful in the effective identification of relevant topics from social media data.</p>
      <p>Although certain cancer-related ontologies, such as those for liver, breast, and gastric cancer, are available, they were developed with professional medical terms for data extraction from or integration with clinical databases [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>] and are not suitable for analyzing social media data posted in consumer terms. Therefore, it is necessary to develop an ontology with terminology containing consumer terms to analyze social media data posted by consumers.</p>
      <p>This study was conducted to develop a cancer ontology with terminology containing consumer terms and to analyze social media data to identify health information needs and emotions related to cancer.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>The study consisted of 2 stages: (1) development and evaluation of a cancer ontology, and (2) analysis of social media data using the ontology.</p>
      <sec>
        <title>Development and Evaluation of a Cancer Ontology</title>
        <p>Ontology development was performed based on previous reports by Noy and McGuinness [<xref ref-type="bibr" rid="ref16">16</xref>] and Jung et al [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>First, the domain and scope of the ontology were determined using the following competency questions: (1) What types of cancer are mentioned in posts on social media? (2) Which care delivery processes (eg, prevention, diagnosis, treatment) of cancer in general and specific cancer types are mentioned in social media posts? (3) What cancer-related topics are mentioned in posts for cancer in general and specific cancer types? (4) What emotions, which mean a range of feelings a patient with cancer can experience when dealing with cancer, are mentioned in posts for cancer in general and specific cancer types?</p>
        <p>The purpose of the ontology was determined as collecting and analyzing social media data to identify cancer information needs and emotions related to cancer.</p>
        <p>Second, existing ontologies on cancer were identified: the Liver Cancer Ontology [<xref ref-type="bibr" rid="ref13">13</xref>], Breast Cancer Ontology [<xref ref-type="bibr" rid="ref14">14</xref>], and Gastric Cancer Ontology [<xref ref-type="bibr" rid="ref15">15</xref>]. Each is limited to a specific type of cancer, and none includes consumer terms. Therefore, a new ontology was developed to include various types of cancer and consumer terms.</p>
        <p>An existing ontology on emotion was also identified—the Sentiment Ontology for Social Web [<xref ref-type="bibr" rid="ref18">18</xref>]. This ontology has top-level classes of emotion as positive, neutral, or negative. These top-level classes were too broad, and the second-level class was too detailed to describe a range of feelings a patient with cancer can experience when dealing with cancer. Therefore, a new ontology reflecting emotions accompanying cancer was deemed necessary.</p>
        <p>Third, terms extracted from the 3 existing cancer-related ontologies, the Sentiment Ontology for Social Web, cancer information portals, and social media posts related to cancer were enumerated.</p>
        <p>The cancer information portals that were reviewed to extract terms were 2 US websites (the National Cancer Institute [<xref ref-type="bibr" rid="ref19">19</xref>] and American Cancer Society [<xref ref-type="bibr" rid="ref20">20</xref>]), 1 UK website (Cancer Research UK [<xref ref-type="bibr" rid="ref21">21</xref>]), and 2 Korean websites (the National Cancer Information Center [<xref ref-type="bibr" rid="ref22">22</xref>] and National Health Information Portal by Korea Centers for Disease Control and Prevention [<xref ref-type="bibr" rid="ref23">23</xref>]).</p>
        <p>These portals included information on emotions accompanying cancer and how to manage them. Emotions included across portals were anger, guilt, and depression [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. In addition, the Cancer Research UK website [<xref ref-type="bibr" rid="ref21">21</xref>] included overwhelmed, denial, anxiety, fear, and sadness; the National Cancer Institute [<xref ref-type="bibr" rid="ref19">19</xref>] included overwhelmed, denial, anxiety, fear, and sadness, loneliness, hope, and gratitude; and the Korean National Cancer Information Center [<xref ref-type="bibr" rid="ref22">22</xref>] included overwhelmed, denial, anxiety, hope, and gratitude.</p>
        <p>Natural language processing (NLP) was used to extract consumer terms from social media data on cancer. Terms with the same meaning as those extracted from existing cancer-related ontologies and cancer information portals or terms with a new meaning related to cancer were collected as consumer terms. These included (1) heteronyms such as “<italic>jol-eob</italic>” (<italic>graduation</italic> in English, meaning <italic>complete cure</italic>); (2) abbreviations such as “<italic>jaegeom</italic>” (an abbreviation of <italic>jaegeomsa</italic>, meaning <italic>re-test</italic> in English), “<italic>chompa</italic>” (an abbreviation of <italic>cho-eumpa</italic>, meaning <italic>ultrasound</italic> in English), “<italic>holmon</italic>” (an abbreviation of <italic>holeumon</italic>, meaning <italic>hormone</italic> in English); and (3) terms used for herbal medicine and complementary therapies.</p>
        <p>Fourth, the classes and their hierarchy and relationships were defined. The collected terms were grouped according to semantic meaning and determined concepts as classes with an independent existence. Hierarchies of the classes were designed based on the relationships of the concepts. The superclass and subclass concepts of the ontology were determined by analyzing the structures of the ontology. A list of synonyms was compiled for each class concept as a terminology presenting the relationship between the concept and synonyms in the ontology.</p>
        <p>Nine domains of cancer-related emotions were identified: <italic>overwhelmed, denial, anger, fear and anxiety, sadness and depression, guilt, loneliness, hope,</italic> and <italic>gratitude</italic>. Each domain was defined as a class concept, and a list of synonyms of the class concepts was mapped as a terminology.</p>
        <p>Fifth, the structure, correctness, and quality of the ontology were evaluated using the evaluation tool described below and by interviewing 3 domain experts: 2 professors of family medicine and 1 professor of bioinformatics.</p>
        <p>The tool consisted of 13 items selected from the studies of Hlomani and Stacey [<xref ref-type="bibr" rid="ref24">24</xref>] and Kehagias et al [<xref ref-type="bibr" rid="ref25">25</xref>]. The items were scored on a 5-point scale for structure (size, depth of hierarchy, breadth of hierarchy, balance, overall complexity, and connectivity between concepts), correctness (accuracy, completeness, conciseness, and consistency), and quality (computational efficiency, adaptability, and clarity). Interviews included open-ended questions that allowed experts to recommend revisions of the ontology. The ontology was revised based on the results of the evaluation.</p>
      </sec>
      <sec>
        <title>Analysis of Social Media Data</title>
        <sec>
          <title>Data Collection and Preparation</title>
          <p>The social media data for this study were posts on cancer collected using a crawler from online communities and blogs of 4 social media platforms in South Korea, namely <italic>Naver</italic>, <italic>Daum</italic>, <italic>Tistory</italic>, and <italic>Egloos</italic>, between January 1, 2014 and June 30, 2017.</p>
          <p>A total of 302 concepts and synonyms of the <italic>cancer type</italic> superclass were used as keywords for post extraction, and 418 concepts were used as stop keywords. For example, when certain Korean words or morphemes, such as “<italic>agseong</italic>” (meaning <italic>malignant</italic> in English) or “<italic>am</italic>” (meaning <italic>cancer</italic> in English) are combined with other words, the phrase could become a word or morpheme with a completely different meaning, such as “<italic>agseong virus</italic>” (meaning computer virus) or “<italic>an-am</italic>” (name of a district in Seoul). Posts containing 59 advertising keywords (eg, detoxification, antioxidant therapy, and enzyme therapy), suggesting an advertising post were removed.</p>
          <p>A total of 754,744 posts were extracted from online communities and blog sites. When categorized by source into blogs and online communities, 442,669 (58.7%) were blog posts. Of the 754,744 posts, 234,118 were from 2014; 235,509 were from 2015; 200,553 were from 2016; and 84,564 were from the first half of 2017. Most of the posts (737,575; 97.7%) were from <italic>Naver</italic> and <italic>Daum</italic>, the 2 major social media platforms in South Korea.</p>
          <p>Next, ontology-based NLP was performed on the posts to extract class concepts.</p>
          <p>The data collection and NLP were carried out in collaboration with a Korean telecommunications company (Smart Insight). During NLP, identifying information (such as name, phone number, and account) was removed, and masked data were delivered to the research team.</p>
        </sec>
        <sec>
          <title>Frequency Analysis of Posts</title>
          <p>The unit of analysis was the post, and the frequencies of posts containing single specific class concepts were counted.</p>
          <p>First, the relative frequencies of posts containing specific cancer types were counted and compared with the national cancer statistics of Korea. Top-ranked cancer types in social media posts were selected for further analyses. Second, the relative frequencies of posts containing superclass concepts were counted and compared by cancer type. Finally, the frequencies of posts containing end-node class concepts were counted and organized by cancer type.</p>
          <p>None of the posts used in this study had any identifying information. The study was approved by the Institutional Review Board of Seoul National University (No. 1802/001-006).</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Development and Evaluation of the Ontology</title>
        <p>Based on the existing cancer-related ontologies and cancer information portals, 10 superclasses were identified: <italic>cancer type, prevention, diagnosis, treatment, prognosis</italic> (including <italic>recurrence</italic> and <italic>cure</italic>), <italic>risk factor, symptom, side effect, dealing with cancer,</italic> and <italic>emotion</italic>. The ontology consisted of concepts that represented care delivery processes and patient outcomes, such as <italic>prevention, diagnosis, treatment,</italic> and <italic>prognosis</italic>, and another set of concepts that represented how consumers managed, felt, perceived, and acted in their personal lives, such as <italic>risk factor, symptom, side effect, dealing with cancer,</italic> and <italic>emotion</italic>.</p>
        <p>The average scores by the 3 experts of 13 items designed to evaluate the structure, correctness, and quality of the ontology ranged from 4.33 to 5 on the 5-point scale. The score for correctness was the highest (mean 4.83), followed by structure (mean 4.67) and quality (mean 4.67). A suggestion made by one of the experts was to combine the superclass of <italic>side effect</italic> with that of <italic>symptom</italic>, moving cancer symptoms and treatment side effects into subclasses of the <italic>symptom</italic> superclass, because it is difficult to distinguish side effects from symptoms without context. Another suggestion was to add the national cancer support system as a subclass of the <italic>dealing with cancer</italic> superclass. The ontology was revised to reflect the comments made by the experts.</p>
        <p>The revised ontology had 9 superclasses, 213 class concepts, and 4061 synonyms. It had 3 to 4 levels of hierarchy, with 36 first-level subclasses and 41 second-level subclasses (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Cancer-related ontology superclasses.</p>
          </caption>
          <graphic xlink:href="jmir_v22i12e18767_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Analysis of Social Media Data</title>
        <sec>
          <title>Comparison of Top-Ranked Cancer Types</title>
          <p><xref rid="figure2" ref-type="fig">Figure 2</xref> presents the top-ranked cancer types mentioned on social media compared with national cancer statistics. Colon cancer (47,940/754,744, 6.4%) was the most frequently mentioned on social media, followed by breast cancer (47,235/754,744, 6.3%) and stomach cancer (37,378/754,744, 5.0%). The 4 highest-ranked cancers according to the national cancer statistics were stomach, colon, thyroid, and lung cancer.</p>
          <p>Colon, breast, stomach, lung, liver, prostate, and pancreatic cancer were within the top 10 rankings in both social media data and the national cancer statistics. Cervical cancer, leukemia, and brain tumors, which ranked within the top 10 cancer types in the social media data, were not included in the top 10 cancer types in the national cancer statistics. Thyroid cancer ranked within top 3 in the national cancer statistics but was not included in the top-ranked cancer types on social media.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Comparison of top-ranked cancer types in social media posts and national cancer statistics.</p>
            </caption>
            <graphic xlink:href="jmir_v22i12e18767_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Frequency of Posts Containing Superclass-Level Concepts</title>
          <p>In terms of the frequency of posts at the superclass level, <italic>risk factor</italic> was the most frequent, appearing in 42.5% (320,568/754,744) of the posts, followed by <italic>emotion</italic> in 33.8% (254,920/754,744), <italic>symptom</italic> in 32.2% (243,010/754,744), <italic>treatment</italic> in 30.2% (227,942/754,744), <italic>dealing with cancer</italic> in 29.4% (221,996/754,744), <italic>diagnosis</italic> in 23.7% (178,498/754,744), <italic>prevention</italic> in 14.4% (108,408/754,744), and <italic>prognosis</italic> in 9.4% of the posts (70,583/754,744).</p>
          <sec>
            <title>Risk Factor Superclass</title>
            <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows the relative frequencies of posts containing <italic>risk factor</italic> superclass concepts for 10 specific cancer types. <italic>Health condition-related risk factor</italic> class concepts (such as related disease and health status) were the most common in most cancer types, followed by <italic>demographic</italic>, <italic>lifestyle</italic> (such as diet and exercise), <italic>environmental</italic>, and <italic>hereditary risk factor</italic> concepts.</p>
            <p><italic>Demographic risk factor</italic> class concepts appeared more frequently in posts on breast cancer, cervical cancer, leukemia, and prostate cancer than in those on other types of cancer. <italic>Lifestyle-related risk factor</italic> class concepts were more frequent in posts on colon, stomach, and lung cancer than in those on other cancer types. <italic>Environmental risk factor</italic> class concepts were more frequent in posts mentioning lung, liver, and prostate cancer than in those on other cancer types.</p>
            <fig id="figure3" position="float">
              <label>Figure 3</label>
              <caption>
                <p>Relative frequencies of the risk factor superclass by top 10 cancer types in social media posts.</p>
              </caption>
              <graphic xlink:href="jmir_v22i12e18767_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Care Process of Cancer: Prevention, Diagnosis, Treatment, Recurrence, and Cure Superclasses</title>
            <p><xref rid="figure4" ref-type="fig">Figure 4</xref> shows the relative frequencies of posts containing superclass concepts related to the cancer care process by 10 specific cancer types. Posts containing <italic>treatment</italic> class concepts were the most common, followed by those related to <italic>diagnosis, prevention</italic>, <italic>recurrence,</italic> and <italic>cure</italic> class concepts for most cancer types, except for cervical cancer.</p>
            <p><italic>Prevention</italic> class concepts were more frequent in posts on cervical, stomach, colon, and prostate cancer and less frequent in posts on brain tumors and leukemia. The <italic>diagnosis</italic> class was dominant in cervical cancer posts, appearing more frequently than the <italic>treatment</italic> class. The <italic>recurrence</italic> class was the least frequently mentioned in leukemia posts, in which the <italic>cure</italic> class was common.</p>
            <fig id="figure4" position="float">
              <label>Figure 4</label>
              <caption>
                <p>Relative frequencies of superclass concepts related to the care process of cancer by top 10 cancer types in social media posts.</p>
              </caption>
              <graphic xlink:href="jmir_v22i12e18767_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Symptom Superclass</title>
            <p><xref rid="figure5" ref-type="fig">Figure 5</xref> shows the relative frequencies of posts containing <italic>symptom</italic> superclass concepts by 10 specific cancer types. <italic>Digestive symptom</italic> class concepts were the most common in posts on all cancer types, followed by <italic>psychological symptom</italic> concepts.</p>
            <p><italic>Digestive symptom</italic> class concepts were predominant in posts related to cancers of the digestive system, such as colon, stomach, and pancreatic cancer. <italic>Psychological and neurological symptom</italic> class concepts appeared more frequently in posts on brain tumors than in those on other cancers. <italic>Metabolic symptom</italic> class concepts were frequent in posts on pancreatic cancer, and <italic>sexual and reproductive symptom</italic> class concepts were frequent in posts on cervical and prostate cancer.</p>
            <fig id="figure5" position="float">
              <label>Figure 5</label>
              <caption>
                <p>Relative frequencies of the symptom superclass by top 10 cancer types in social media posts.</p>
              </caption>
              <graphic xlink:href="jmir_v22i12e18767_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Dealing With Cancer Superclass</title>
            <p><xref rid="figure6" ref-type="fig">Figure 6</xref> shows the relative frequencies of posts containing <italic>dealing with cancer</italic> superclass concepts by 10 specific cancer types.</p>
            <p><italic>Daily life</italic> class (involving diet and exercise) concepts were predominant in posts on the 10 cancer types. The most noticeable class concept in posts on <italic>daily life</italic> was <italic>diet</italic>. <italic>Leisure</italic> class concepts (involving sex life, travel, and driving) were more common in posts on liver and breast cancer.</p>
            <fig id="figure6" position="float">
              <label>Figure 6</label>
              <caption>
                <p>Relative frequencies of the dealing with cancer superclass by top 10 cancer types in social media posts.</p>
              </caption>
              <graphic xlink:href="jmir_v22i12e18767_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Emotion Superclass</title>
            <p><xref rid="figure7" ref-type="fig">Figure 7</xref> shows the relative frequencies of posts containing <italic>emotion</italic> superclass concepts by 10 specific cancer types. <italic>Fear/anxiety</italic> class concepts were the most common, followed by <italic>hope</italic> and <italic>sadness/depression</italic> concepts, in posts on all cancer types except for liver cancer and brain tumors.</p>
            <p><italic>Hope</italic> class concepts were more frequently mentioned than <italic>fear/anxiety</italic> class concepts in posts on liver cancer. <italic>Overwhelmed</italic> class concepts were more frequent in posts on brain tumors and pancreatic cancer than in those on other cancers. <italic>Gratitude</italic> and <italic>guilt</italic> class concepts were more frequently mentioned in posts on leukemia than in those on other cancers.</p>
            <fig id="figure7" position="float">
              <label>Figure 7</label>
              <caption>
                <p>Relative frequencies of the emotion superclass by top 10 cancer types in social media posts.</p>
              </caption>
              <graphic xlink:href="jmir_v22i12e18767_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
        </sec>
        <sec>
          <title>Frequencies of Posts Containing Class-Level Concepts by Cancer Type</title>
          <p><xref rid="figure8" ref-type="fig">Figure 8</xref> shows the top-ranked end-node class concepts in order of relative frequency by cancer type.</p>
          <p>The <italic>dealing with diet</italic> class was ranked first in posts related to 8 cancer types. The dominant concepts and terms appearing in posts about <italic>dealing with diet</italic> were food, protein, fruit, vitamin, meal, vegetable, and nutrition.</p>
          <fig id="figure8" position="float">
            <label>Figure 8</label>
            <caption>
              <p>Top ranked end-node class concepts by cancer type.</p>
            </caption>
            <graphic xlink:href="jmir_v22i12e18767_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Class concepts related to the care process of cancer were frequently highly ranked; the <italic>unspecified treatment</italic> class was highly ranked for all types of cancer. The <italic>surgery</italic> class was ranked within the top 10 class concepts in posts on colon, breast, stomach, and liver cancer, and brain tumors. The <italic>chemotherapy</italic> class was ranked within the top 10 class concepts in posts on breast, stomach, lung, liver, and pancreatic cancer, and leukemia.</p>
          <p>Several risk factor–related class concepts ranked in the top 10 class concepts, but the order differed by cancer type. <italic>Gender</italic> and <italic>age</italic> classes frequently appeared in posts on breast, cervical, prostate cancer, and leukemia. The <italic>dietary risk factor</italic> class frequently appeared in posts on colon, stomach, and prostate cancer, and <italic>obesity</italic> class frequently appeared in posts on colon cancer. The <italic>chemical risk factor</italic> class frequently appeared in posts on lung, liver, and prostate cancer, and leukemia. The <italic>infectious agent</italic> class (such as viruses) frequently appeared in posts on cervical cancer.</p>
          <p>There were no posts containing <italic>support group/support community</italic> class concepts in posts on thyroid cancer, but these concepts appeared in posts on all of the other top 10 cancers.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, an ontology containing consumer terms was developed to collect and analyze social media data to identify health information needs and emotions related to cancer. The ontology has the following characteristics.</p>
        <p>First, compared to other cancer ontologies, this ontology covers more cancer types and comprehensive topics, including the care process describing the interactions of health care providers and patients, risk factors that consumers have, emotions that consumers feel related to cancer, symptoms that consumers perceive, and the lifestyles that consumers lead.</p>
        <p>Second, the ontology has a terminology component that presents consumer terms to analyze social media posts about cancer, including synonyms, heteronyms, and abbreviated expressions.</p>
        <p>A total of 754,744 cancer-related posts on social media were collected. The 6-month collection of 2017 posts was less than half that of the full-year collection of the previous year. Monthly analysis showed cancer-related social media posts were more frequent in the second half of the year than in the first half of the year. In particular, the number of posts in February 2014 was almost twice the monthly average, which was the same as the search results of Google Trends. According to Google Trends, the cancer diagnosis and death from cancer of 2 Korean celebrities had been announced at this time.</p>
        <p>Social media data were analyzed using NLP with ontology concepts and terms to identify consumers’ information needs. The frequencies of topics in social media data may indirectly reflect consumers’ information needs, as they frequently post on topics in which they are interested or about which they have concerns [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>Thyroid cancer, which ranked in the top 3 in the national cancer statistics, was not included in the top-ranked cancer types on social media. This finding is similar to that of Buis and Whitten [<xref ref-type="bibr" rid="ref26">26</xref>], showing that the information needs related to cancers with a low survival rate are higher than those for cancers with a high survival rate. The 5-year survival rate for thyroid cancer in Korea was about 100% in 2013-2017, compared to 70.4% for all types of cancer [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        <p>Frequency analysis by superclass revealed the highest frequency of posts on social media related to <italic>risk factor</italic> (320,568/754,744, 42.47%), followed by <italic>emotion</italic> (254,920/754,744, 33.78%), <italic>symptom</italic> (243,010/754,744, 32.20%), and <italic>treatment</italic> (227,942/754,744, 30.20%). These findings can be compared to those of 2 previous studies: Lu et al [<xref ref-type="bibr" rid="ref7">7</xref>] clustered posts of patients with lung and breast cancer and extracted symptoms, examinations, and treatments (drugs, procedures) as dominant topics. Cho et al [<xref ref-type="bibr" rid="ref1">1</xref>] performed a qualitative content analysis of Q &#38; A posts of patients with breast cancer, and extracted treatment, physical condition, and lifestyle/self-care as dominant topics. They also found that 75% of the information requests included expressions of emotion [<xref ref-type="bibr" rid="ref1">1</xref>]. Although these previous studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] focused mainly on the stages after diagnosis, such as treatment, examinations, and physical condition, our results revealed that the majority of consumers’ information needs involved risk factors. Also, previous studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] used data collected from cancer community posts by patients with specific cancer types, whereas this study used data collected from social media posts by the general public. Social media data may include concerns of the public about cancer risk factors. In addition, as the ontology developed in this study covered comprehensive topics, including risk factors and emotions, this study had a more diverse focus than those of previous studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>Consistent with the findings of the 2 previous studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], the most common emotions related to the top 10 cancers in this study were <italic>fear/anxiety, hope</italic>, and <italic>sadness/depression</italic>. Cho et al [<xref ref-type="bibr" rid="ref1">1</xref>] reported that anxiety/worry, gratitude, fear, and sadness were frequent in posts by Korean women with breast cancer, and Freedman et al [<xref ref-type="bibr" rid="ref9">9</xref>] reported that fears, anxiety, denial, and depression were frequent emotions cited on treatment in posts by patients with breast cancer. However, among positive emotions, gratitude was included in the top 3 in the study by Cho et al [<xref ref-type="bibr" rid="ref1">1</xref>], whereas hope was ranked in the top 3 in our study. The question and answer board in Cho et al's study [<xref ref-type="bibr" rid="ref1">1</xref>] included more posts on gratitude toward health care providers, in contrast to social media posts in this study that often included posts on hope for a cure by patients and their families.</p>
        <p>Frequency analysis by the end-node class level indicated that <italic>dealing with diet</italic> ranked among the top-class concepts for most types of cancer. This finding was consistent with those of previous information-need studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref27">27</xref>] in Korean patients with breast cancer. Cho et al [<xref ref-type="bibr" rid="ref1">1</xref>] reported that patients asked many questions related to diet, and Kim and Hur [<xref ref-type="bibr" rid="ref27">27</xref>] reported high information-need scores for diet. These findings reflect the importance of diet in disease management that is perceived by the Korean population. Concepts and terms such as food, protein, fruit, vitamin, meal, vegetable, and nutrition were top ranked in the posts collected in this study, demonstrating consumers’ information needs related to a healthy diet.</p>
        <p>This study also compared the frequencies of superclass concepts and end-node class concepts by 10 types of cancer.</p>
        <p>Regarding the care process of cancer, <italic>treatment</italic> was the most frequently mentioned for all types of cancer, except cervical cancer. <italic>Diagnosis</italic> and <italic>prevention</italic> appeared more frequently in posts on cervical cancer. In Korea, the national cancer screening program for cervical cancer is recommended from 20 years of age. Whereas breast and gastric cancer screenings are recommended from 40 years of age, and colorectal cancer screening is recommended from 50 years of age. As the opportunity to become interested in screening for cervical cancer comes earlier than for other cancers, the active use of social media by young women may have resulted in greater numbers of posts related to diagnosis and prevention.</p>
        <p>The frequencies of risk factor–related class concepts, especially <italic>diet, chemical, tobacco, obesity,</italic> and <italic>infectious agent</italic>, differed between cancer types. Previous research findings have indicated that differences in post frequency on cancer topics reflect the different information needs for each cancer type [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Therefore, tailored information on risk factors should be provided according to consumers’ needs by cancer type. In addition, tailored information on cancer can be provided according to the consumers’ specific risk factors.</p>
        <p>Posts on leukemia contained a higher rate of feelings of <italic>guilt</italic>. According to the National Cancer Information Center [<xref ref-type="bibr" rid="ref22">22</xref>] data, leukemia has a higher incidence in children than those of other types of cancer. In many cases, posts related to leukemia were likely written by the parents of patients rather than by the patients themselves. This finding suggests that emotional management is necessary for not only patients with cancer but also their family members and friends.</p>
        <p>Based on these findings, we suggest what information should be provided and how it can be provided. These suggestions would aid information providers, namely clinicians and portals operated by government or professional societies, to improve care for patients with cancer by providing relevant information based on consumers’ information needs.</p>
        <p>First, it is necessary to ensure that sufficient information on risk factors is provided to the public. Not only do information needs increase after cancer is diagnosed, but they are also high for risk factor management. Thus, information on risk factors should be provided depending on the concerns of consumers.</p>
        <p>Second, the high consumers’ information needs on healthy diets were noteworthy. Qualified information on diet should be provided to patients with cancer. Collaboration with a nutritionist would be effective in providing tailored nutritional information for each cancer type according to the needs of patients.</p>
        <p>Third, in general, most information portals provide information through the same organization and the same flow of information for all types of cancer. Tailored information can be provided according to cancer type and the characteristics of the consumer, such as age, gender, and risk exposure. It is possible to make it easier to access the information that consumers want using keyword visualization or navigation. Different navigation routes could be applied according to cancer type. In addition, applying different types of visualization could improve the convenience of consumers before or after the cancer diagnosis.</p>
        <p>Finally, providing information and emotional support are not separate but, instead, coexist. It is necessary to provide reliable information and management for emotional care so that people do not rely on only family and caregivers for emotional support. One possible approach is to combine the functions of online support groups with information portals to provide emotional support.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study had some limitations. Only the top 10 cancer types were analyzed. Future studies should analyze data on other types of cancer. The social media posts were made by not only patients with cancer but also caregivers and the general public. However, these populations could not be distinguished because identifying information of the consumers was not collected. Further research is needed to collect social media data with an identification algorithm to distinguish the status of the authors of the posts and to provide tailored information.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>This study was performed to develop a cancer ontology with terminology containing consumer terms to collect and analyze social media data. The ontology consisted of 9 superclasses (<italic>cancer type, prevention, diagnosis, treatment, prognosis, risk factor, symptom, dealing with cancer,</italic> and <italic>emotion</italic>), 213 classes, and 4061 synonyms with consumer-generated terms. It used 9 emotional classes (<italic>overwhelmed, denial, anger, fear and anxiety, sadness and depression, guilt, loneliness, hope,</italic> and <italic>gratitude</italic>) to investigate emotional status in the social media data on cancer.</p>
        <p>This ontology, containing comprehensive cancer-related topics, enabled identification and comparison of consumer interests and concerns about risk factors, dealing with cancer, and emotions as well as the care process in social media data. The results of this study showed that information needs and emotions differ according to cancer type. These observations could be used to provide tailored information to consumers according to the cancer type and care process. Care for patients with cancer can be improved by providing relevant information based on consumers' information needs.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by grants from the National Research &#38; Development Program for Cancer Control, Ministry of Health &#38; Welfare, Republic of Korea (No.1720250) and the Gachon University research fund of 2018 (GCU-2018-0292).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Roter</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Guallar</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Noh</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>DE</given-names>
            </name>
          </person-group>
          <article-title>Needs of women with breast cancer as communicated to physicians on the Internet</article-title>
          <source>Support Care Cancer</source>
          <year>2011</year>
          <month>01</month>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>113</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1007/s00520-009-0794-0</pub-id>
          <pub-id pub-id-type="medline">21290791</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hasson-Ohayon</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Tuval-Mashiach</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Goldzweig</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Levi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pizem</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>The need for friendships and information: Dimensions of social support and posttraumatic growth among women with breast cancer</article-title>
          <source>Palliat Support Care</source>
          <year>2016</year>
          <month>08</month>
          <volume>14</volume>
          <issue>4</issue>
          <fpage>387</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1017/S1478951515001042</pub-id>
          <pub-id pub-id-type="medline">26462622</pub-id>
          <pub-id pub-id-type="pii">S1478951515001042</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mazor</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Beard</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Alexander</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Arora</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Firneno</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gaglio</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Greene</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Lemay</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Roblin</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Walsh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Street</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Gallagher</surname>
              <given-names>TH</given-names>
            </name>
          </person-group>
          <article-title>Patients' and family members' views on patient-centered communication during cancer care</article-title>
          <source>Psychooncology</source>
          <year>2013</year>
          <month>11</month>
          <volume>22</volume>
          <issue>11</issue>
          <fpage>2487</fpage>
          <lpage>95</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23780672"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/pon.3317</pub-id>
          <pub-id pub-id-type="medline">23780672</pub-id>
          <pub-id pub-id-type="pmcid">PMC3808529</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corter</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Speller</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sequeira</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Facey</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Baxter</surname>
              <given-names>NN</given-names>
            </name>
          </person-group>
          <article-title>What young women with breast cancer get versus what they want in online information and social media supports</article-title>
          <source>J Adolesc Young Adult Oncol</source>
          <year>2019</year>
          <month>06</month>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>320</fpage>
          <lpage>328</lpage>
          <pub-id pub-id-type="doi">10.1089/jayao.2018.0125</pub-id>
          <pub-id pub-id-type="medline">30648917</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kemp</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Koczwara</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Butow</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Girgis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schofield</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hulbert-Williams</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Levesque</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Spence</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Vatandoust</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kichenadasse</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sukumaran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Karapetis</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fitzgerald</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beatty</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Online information and support needs of women with advanced breast cancer: a qualitative analysis</article-title>
          <source>Support Care Cancer</source>
          <year>2018</year>
          <month>10</month>
          <volume>26</volume>
          <issue>10</issue>
          <fpage>3489</fpage>
          <lpage>3496</lpage>
          <pub-id pub-id-type="doi">10.1007/s00520-018-4206-1</pub-id>
          <pub-id pub-id-type="medline">29693203</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00520-018-4206-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>On</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis of social media on childhood vaccination: development of an ontology</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>06</month>
          <day>07</day>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>e13456</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/6/e13456/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13456</pub-id>
          <pub-id pub-id-type="medline">31199290</pub-id>
          <pub-id pub-id-type="pii">v21i6e13456</pub-id>
          <pub-id pub-id-type="pmcid">PMC6592483</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Health-related hot topic detection in online communities using text clustering</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <month>01</month>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>e56221</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0056221"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0056221</pub-id>
          <pub-id pub-id-type="medline">23457530</pub-id>
          <pub-id pub-id-type="pii">PONE-D-12-27785</pub-id>
          <pub-id pub-id-type="pmcid">PMC3574139</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>Exploring online support spaces: using cluster analysis to examine breast cancer, diabetes and fibromyalgia support groups</article-title>
          <source>Patient Educ Couns</source>
          <year>2012</year>
          <month>05</month>
          <volume>87</volume>
          <issue>2</issue>
          <fpage>250</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pec.2011.08.017</pub-id>
          <pub-id pub-id-type="medline">21930359</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(11)00468-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Freedman</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Viswanath</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Vaz-Luis</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Keating</surname>
              <given-names>NL</given-names>
            </name>
          </person-group>
          <article-title>Learning from social media: utilizing advanced data extraction techniques to understand barriers to breast cancer treatment</article-title>
          <source>Breast Cancer Res Treat</source>
          <year>2016</year>
          <month>07</month>
          <volume>158</volume>
          <issue>2</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1007/s10549-016-3872-2</pub-id>
          <pub-id pub-id-type="medline">27339067</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10549-016-3872-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alpers</surname>
              <given-names>GW</given-names>
            </name>
            <name name-style="western">
              <surname>Winzelberg</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Classen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dev</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Koopman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Barr Taylor</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of computerized text analysis in an internet breast cancer support group</article-title>
          <source>Computers in Human Behavior</source>
          <year>2005</year>
          <month>3</month>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>361</fpage>
          <lpage>376</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2004.02.008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Silva</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ranasinghe</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bandaragoda</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Adikari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mills</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Iddamalgoda</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Alahakoon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lawrentschuk</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Persad</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Osipov</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bolton</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Machine learning to support social media empowered patients in cancer care and cancer treatment decisions</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <month>10</month>
          <volume>13</volume>
          <issue>10</issue>
          <fpage>e0205855</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0205855"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0205855</pub-id>
          <pub-id pub-id-type="medline">30335805</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-17935</pub-id>
          <pub-id pub-id-type="pmcid">PMC6193663</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bandaragoda</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ranasinghe</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Adikari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>de Silva</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lawrentschuk</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Alahakoon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Persad</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bolton</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The Patient-Reported Information Multidimensional Exploration (PRIME) framework for investigating emotions and other factors of prostate cancer patients with low intermediate risk based on online cancer support group discussions</article-title>
          <source>Ann Surg Oncol</source>
          <year>2018</year>
          <month>2</month>
          <day>21</day>
          <volume>25</volume>
          <issue>6</issue>
          <fpage>1737</fpage>
          <lpage>1745</lpage>
          <pub-id pub-id-type="doi">10.1245/s10434-018-6372-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaur</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Khamparia</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Diagnosis of liver cancer ontology using SPARQL</article-title>
          <source>International Journal of Applied Engineering Research</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>69</issue>
          <fpage>15</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/280740853_Diagnosis_of_liver_cancer_ontology_using_SPARQL/link/55c4eaeb08aeca747d6182b4/download"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jusoh</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Othman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Omar</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Development of breast cancer ontology based on hybrid approach</article-title>
          <source>International Journal of Innovative Computing</source>
          <year>2013</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.11113/ijic.v3n1.32</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mahmoodi</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Mirzaie</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mahmoudi</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>A new algorithm to extract hidden rules of gastric cancer data based on ontology</article-title>
          <source>Springerplus</source>
          <year>2016</year>
          <volume>5</volume>
          <fpage>312</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27066344"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s40064-016-1943-9</pub-id>
          <pub-id pub-id-type="medline">27066344</pub-id>
          <pub-id pub-id-type="pii">1943</pub-id>
          <pub-id pub-id-type="pmcid">PMC4786510</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noy</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>McGuinness</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Ontology development 101: A guide to creating your first ontology</article-title>
          <source>Stanford University</source>
          <year>2001</year>
          <access-date>2019-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://protege.stanford.edu/publications/ontology_development/ontology101.pdf">https://protege.stanford.edu/publications/ontology_development/ontology101.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Ontology-based approach to social data sentiment analysis: detection of adolescent depression signals</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>07</month>
          <day>24</day>
          <volume>19</volume>
          <issue>7</issue>
          <fpage>e259</fpage>
          <pub-id pub-id-type="doi">10.2196/jmir.7452</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <article-title>Sentiment Ontology for Social Web</article-title>
          <source>Telecommunications Technology Association</source>
          <year>2013</year>
          <access-date>2019-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://committee.tta.or.kr/summary/standard_view.jsp?publish_year=2013&#38;section=1&#38;pk_num=TTAK.KO-10.0639/R1&#38;nowSu=38&#38;section_code=">https://committee.tta.or.kr/summary/standard_view.jsp?publish_year=2013&#38;section=1&#38;pk_num=TTAK.KO-10.0639/R1&#38;nowSu=38&#38;section_code=</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <article-title>Comprehensive cancer information</article-title>
          <source>National Cancer Institute</source>
          <access-date>2019-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cancer.gov/">https://www.cancer.gov/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>Information and resources about for cancer</article-title>
          <source>American Cancer Society</source>
          <access-date>2019-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cancer.org/">https://www.cancer.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <article-title>Together we will beat cancer</article-title>
          <source>Cancer Research UK</source>
          <access-date>2019-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cancerresearchuk.org">https://www.cancerresearchuk.org</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <article-title>National cancer information center</article-title>
          <source>National Cancer Center</source>
          <access-date>2019-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cancer.go.kr/">https://www.cancer.go.kr/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <article-title>National health information portal</article-title>
          <source>Korea Centers for Disease Control and Prevention</source>
          <access-date>2019-12-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://health.cdc.go.kr">https://health.cdc.go.kr</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hlomani</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stacey</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Approaches, methods, metrics, measures, and subjectivity in ontology evaluation: a survey</article-title>
          <source>Semantic Web Journal</source>
          <year>2014</year>
          <volume>1</volume>
          <issue>5</issue>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.semantic-web-journal.net/content/approaches-methods-metrics-measures-and-subjectivity-ontology-evaluation-survey"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kehagias</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Papadimitriou</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Hois</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tzovaras</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bateman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A methodological approach for ontology evaluation and refinement</article-title>
          <source>ASK-IT Final Conference</source>
          <year>2008</year>
          <conf-name>ASK-IT Final Conference</conf-name>
          <conf-date>June, 2008</conf-date>
          <conf-loc>Nuremburg, Germany</conf-loc>
          <fpage>1</fpage>
          <lpage>13</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buis</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Whitten</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Comparison of social support content within online communities for high- and low-survival-rate cancers</article-title>
          <source>Comput Inform Nurs</source>
          <year>2011</year>
          <month>08</month>
          <volume>29</volume>
          <issue>8</issue>
          <fpage>461</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1097/NCN.0b013e318214093b</pub-id>
          <pub-id pub-id-type="medline">21876394</pub-id>
          <pub-id pub-id-type="pii">00024665-201108000-00011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hur</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Information needs on patients with cancer in Korea</article-title>
          <source>The Journal of Korean Academic Society of Adult Nursing</source>
          <year>2002</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>135</fpage>
          <lpage>143</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.koreascience.or.kr/article/JAKO200225458752257.page"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
