<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i5e15589</article-id>
      <article-id pub-id-type="pmid">32452808</article-id>
      <article-id pub-id-type="doi">10.2196/15589</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>An Exploration Into the Use of a Chatbot for Patients With Inflammatory Bowel Diseases: Retrospective Cohort Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kocaballi</surname>
            <given-names>Ahmet</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gabashvili</surname>
            <given-names>Irene</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>del Hoyo Francisco</surname>
            <given-names>Javier</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Zand</surname>
            <given-names>Aria</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Vatche and Tamar Manoukian Division of Digestive Diseases</institution>
            <institution>UCLA Center for Inflammatory Bowel Diseases</institution>
            <institution>David Geffen School of Medicine, University of California at Los Angeles</institution>
            <addr-line>10945 Le Conte Avenue</addr-line>
            <addr-line>Los Angeles, CA, 90095</addr-line>
            <country>United States</country>
            <phone>1 3102065403</phone>
            <email>azand89@gmail.com</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5157-9380</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Sharma</surname>
            <given-names>Arjun</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7575-200X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Stokes</surname>
            <given-names>Zack</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8081-3041</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Reynolds</surname>
            <given-names>Courtney</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0298-1039</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Montilla</surname>
            <given-names>Alberto</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5097-9083</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Sauk</surname>
            <given-names>Jenny</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6731-8193</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Hommes</surname>
            <given-names>Daniel</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1559-1179</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Vatche and Tamar Manoukian Division of Digestive Diseases</institution>
        <institution>UCLA Center for Inflammatory Bowel Diseases</institution>
        <institution>David Geffen School of Medicine, University of California at Los Angeles</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Digestive Diseases</institution>
        <institution>Leiden University Medical Center</institution>
        <addr-line>Leiden</addr-line>
        <country>Netherlands</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Cisco Systems Inc</institution>
        <institution>Collaboration Technology Group</institution>
        <addr-line>Dallas, TX</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Aria Zand <email>azand89@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>26</day>
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>5</issue>
      <elocation-id>e15589</elocation-id>
      <history>
        <date date-type="received">
          <day>22</day>
          <month>7</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>3</day>
          <month>10</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>27</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>3</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Aria Zand, Arjun Sharma, Zack Stokes, Courtney Reynolds, Alberto Montilla, Jenny Sauk, Daniel Hommes. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 26.05.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/5/e15589/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The emergence of chatbots in health care is fast approaching. Data on the feasibility of chatbots for chronic disease management are scarce.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to explore the feasibility of utilizing natural language processing (NLP) for the categorization of electronic dialog data of patients with inflammatory bowel diseases (IBD) for use in the development of a chatbot.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Electronic dialog data collected between 2013 and 2018 from a care management platform (<italic>UCLA eIBD</italic>) at a tertiary referral center for IBD at the University of California, Los Angeles, were used. Part of the data was manually reviewed, and an algorithm for categorization was created. The algorithm categorized all relevant dialogs into a set number of categories using NLP. In addition, 3 independent physicians evaluated the appropriateness of the categorization.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 16,453 lines of dialog were collected and analyzed. We categorized 8324 messages from 424 patients into seven categories. As there was an overlap in these categories, their frequencies were measured independently as symptoms (2033/6193, 32.83%), medications (2397/6193, 38.70%), appointments (1518/6193, 24.51%), laboratory investigations (2106/6193, 34.01%), finance or insurance (447/6193, 7.22%), communications (2161/6193, 34.89%), procedures (617/6193, 9.96%), and miscellaneous (624/6193, 10.08%). Furthermore, in 95.0% (285/300) of cases, there were minor or no differences in categorization between the algorithm and the three independent physicians.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>With increased adaptation of electronic health technologies, chatbots could have great potential in interacting with patients, collecting data, and increasing efficiency. Our categorization showcases the feasibility of using NLP in large amounts of electronic dialog for the development of a chatbot algorithm. Chatbots could allow for the monitoring of patients beyond consultations and potentially empower and educate patients and improve clinical outcomes.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>chatbots</kwd>
        <kwd>inflammatory bowel diseases</kwd>
        <kwd>eHealth</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>telehealth</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Recent technological advances have allowed for artificial intelligence (AI) to successfully integrate itself into many aspects of daily life. Besides implementation in voice bots such as Amazon’s Alexa and Apple’s Siri, AI is also utilized to predict financial stock market changes and answer student questions in educational settings [<xref ref-type="bibr" rid="ref1">1</xref>]. In health care, AI is expected to disrupt the role of physicians as well; however, experts predict that AI will support the intelligence and knowledge base of physicians rather than replace them entirely [<xref ref-type="bibr" rid="ref2">2</xref>]. For instance, AI can utilize deep-learning algorithms, which function like the neural networks of the brain and distinguish patterns, to recognize certain types of brain tumors, vascular conditions, or pneumonia on imaging scans and prioritize these cases in the workflow of a radiologist [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. In addition, AI can be used to quickly review patient scans and rule out certain diagnoses, thereby increasing the efficiency and accuracy of a radiologist [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
        <p>Another significant way AI can augment health care delivery is through medical chatbots. A chatbot, or chatterbot, attempts to simulate a natural conversation with a human user [<xref ref-type="bibr" rid="ref4">4</xref>]. Medical chatbots are already being implemented into regular practice: the Insomnobot-3000 helps insomniacs get through the night, and the Endurance bot acts as a companion for dementia patients [<xref ref-type="bibr" rid="ref5">5</xref>]. In addition, there are significant efforts toward the development of diagnostic chatbots. Some popular ones include Your.MD, Buoy Health, Sensely, Infermedica, and Florence (<xref ref-type="table" rid="table1">Table 1</xref>) [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>Although there are limited data on these general medical chatbots in clinical practice, some independent bodies have provided preliminary and positive results in tests with more specific medical chatbots [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>Most chatbots utilize natural language processing (NLP), which can be simply defined as the use of computers for analyzing human language [<xref ref-type="bibr" rid="ref9">9</xref>]. One application of NLP relies on human identification of key elements within an event or situation that might constitute a useful summary of a given document or dataset [<xref ref-type="bibr" rid="ref10">10</xref>]. Recently, there have been growing trends toward the use of electronic health records (EHRs). Multiple studies have attempted to use NLP to extract useful information from EHRs. In one study, researchers used NLP to identify patients with ulcerative colitis and Crohn disease from EHR data collected from Massachusetts General Hospital and Brigham and Women’s Hospital [<xref ref-type="bibr" rid="ref11">11</xref>]. The study developed an algorithm that partly relied on recognizing keywords associated with ulcerative colitis or Crohn disease to analyze the narrative texts and was verified via comparison to a physician’s review and classification of the same narrative texts [<xref ref-type="bibr" rid="ref11">11</xref>]. Ultimately, the study determined that NLP of patient narrative texts provided a more accurate means of identifying patients who had ulcerative colitis and Crohn disease than previous models that had relied on reviewing billing codes [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>In another study by the University of Alabama, researchers developed an algorithm that analyzed the EHRs of patients collected over 3 years and organized the EHRs into pathology clusters based on key terms [<xref ref-type="bibr" rid="ref12">12</xref>]. This team also concluded that electronic text mining of health records, or NLP, is an effective method for analyzing large health care datasets [<xref ref-type="bibr" rid="ref12">12</xref>]. More recent studies have even attempted to use NLP models to study the semantics and sentence flows found in clinical narrative data [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. The literature shows that it is common to perform exploratory analysis on natural language data to understand the topics and vocabulary of a specific domain in health care [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. This exploration is often done by grouping keywords and categorizing topics or using open-source technology such as clinical Text Analysis and Knowledge Extraction [<xref ref-type="bibr" rid="ref13">13</xref>]. A deep initial understanding facilitates the creation and comparison of more complex, health care-focused NLP models. However, it is worth noting that certain aspects of patient consultations in clinical settings, such as electronic record style, patient behavior, and physician experience, can vary from clinic to clinic [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. This variability found within patient data puts limits on what NLP can do without a large and diverse sample.</p>
        <p>In addition, despite the extensive literature on the topic, there seems to be a lack of research into the use of NLP to analyze raw consultation dialog data of patients with specific chronic conditions such as inflammatory bowel diseases (IBD). The organization of the patient with IBD to health care provider (HCP) dialog is likely to be distinct from a general patient population due to the complex nature of the disease. Understanding how these dialogs can be organized is an important first step in assessing the feasibility of a chatbot for this population.</p>
        <p>Chatbots that utilize NLP can help to improve the way health care is delivered in multiple ways. For one, they improve accessibility to health care for patients outside of clinics and hospitals. From kids to the elderly, patients often need care outside of inpatient consultations; lack of such support is associated with inefficiency, high health care costs, and burdened HCPs [<xref ref-type="bibr" rid="ref15">15</xref>]. With a chatbot, these patients would have immediate and autonomous support at home.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Overview of current medical chatbots.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="210"/>
            <col width="150"/>
            <col width="330"/>
            <col width="310"/>
            <thead>
              <tr valign="top">
                <td>Name</td>
                <td>Disease area</td>
                <td>Objective</td>
                <td>What does it do</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Your.MD (UK<sup>a</sup>)</td>
                <td>General</td>
                <td>Provide reliable information for common symptoms, recommends relevant resources</td>
                <td>Safely advises patients based on symptoms described in an app-based messaging system</td>
              </tr>
              <tr valign="top">
                <td>Endurance (Russia)</td>
                <td>Dementia</td>
                <td>Act as a companion for patients with short-term memory loss and help to identify signs of worsening patient condition</td>
                <td>It works via voice recognition to ask questions and react to answers. It can speak on a variety of topics and pull interesting news from Google</td>
              </tr>
              <tr valign="top">
                <td>Insomnobot-3000 (US<sup>b</sup>)</td>
                <td>Insomnia</td>
                <td>Acts as a companion for insomniacs when they are awake at night.</td>
                <td>Has conversations with patients via text</td>
              </tr>
              <tr valign="top">
                <td>Pharmabot (Philippines)</td>
                <td>Pediatrics</td>
                <td>Designed to help pediatric patients get appropriate generic medicine for certain ailments</td>
                <td>The system works in a software application that sets particular guidelines for interaction with the chatbot</td>
              </tr>
              <tr valign="top">
                <td>Text-based healthcare chatbots on Mobile Coach (Switzerland)</td>
                <td>Childhood obesity</td>
                <td>Provide a peer character for obese teenagers and keep them engaged. In addition, sought to show the benefit of text-based chatbot interventions in health care</td>
                <td>Works in a text channel within an app interface. Also, has predefined answer options for more efficient chat interactions</td>
              </tr>
              <tr valign="top">
                <td>Molly by Sensely (US)</td>
                <td>General</td>
                <td>Diagnose patients with common ailments appropriately based on symptoms</td>
                <td>Advises patients based on symptoms described in an app-based messaging system</td>
              </tr>
              <tr valign="top">
                <td>Buoy Health (US)</td>
                <td>General</td>
                <td>Diagnose patients accurately based on symptoms. Harvard team developed the algorithm for this bot using 18,000 medical papers for data</td>
                <td>Program asks a series of questions—for which there are predefined choices to choose from—to appropriately advise patient. Found on a Web-based software</td>
              </tr>
              <tr valign="top">
                <td>Symptomate by Infermedica (Poland)</td>
                <td>General</td>
                <td>Attempt to increase health care provider efficiency, reduce costs, and improve patient flow by acting as a general symptom checker</td>
                <td>Online software that collects and analyzes symptom data via predefined questions with answers to provide appropriate response</td>
              </tr>
              <tr valign="top">
                <td>Florence (Germany)</td>
                <td>General</td>
                <td>Acts as a <italic>personal nurse</italic> that can remind patients to take prescriptions and keep track of user’s health (weight, mood, etc)</td>
                <td>Advises patients based on symptoms described in an app via Facebook messenger</td>
              </tr>
              <tr valign="top">
                <td>Ada (international)</td>
                <td>General</td>
                <td>Help patients actively manage health based on common symptoms</td>
                <td>Ada poses simple and relevant questions to patients and then compares their symptoms with thousands of similar cases to help provide possible explanations</td>
              </tr>
              <tr valign="top">
                <td>Holly by Nimblr (US)</td>
                <td>N/A<sup>c</sup></td>
                <td>Helps patients schedule and reschedule appointments to help prevent no shows or cancellations and improve patient experience</td>
                <td>Interacts with patients via text and Amazon’s Alexa to update electronic health records</td>
              </tr>
              <tr valign="top">
                <td>Woebot (US)</td>
                <td>Psychiatry</td>
                <td>Make mental health care more accessible to people around the world</td>
                <td>Uses methods from cognitive behavioral therapy to help patients think through situations. It also includes intelligent mood tracking</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>UK: United Kingdom.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>US: United States.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>The primary objective of this study was to accurately categorize large datasets of electronic messages between patients with IBD and HCPs using natural language processing (NLP) to assess the feasibility of developing a medical chatbot for patients with IBD.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Design and Population</title>
        <p>In this study, we aimed to assess the feasibility of utilizing NLP on historical electronic messaging data of patients with IBD for use in the development of a medical chatbot. As IBD is a chronic illness characterized by severe and recurring abdominal pain and diarrhea, patients require frequent contact with their physicians and care team to monitor these alternating disease states and potential relapses [<xref ref-type="bibr" rid="ref16">16</xref>]. There is great potential here for a chatbot as patients need frequent monitoring beyond regular consultations, which is often troublesome due to the complex nature of the disease and a busy care team.</p>
        <p>Patients enrolled in the University of California, Los Angeles (UCLA) Center for IBD electronic care management platform (UCLA eIBD) were retrospectively assessed. The UCLA eIBD platform is a care management software as a service with a Web-based platform for providers that includes treatment decision support, business intelligence, messaging functionality, and performance improvement tools. On the patient’s side, there is a mobile app that includes care management insight, educational modules, surveys, and messaging (<xref rid="figure1" ref-type="fig">Figure 1</xref>) [<xref ref-type="bibr" rid="ref16">16</xref>]. Retrospective dialog data between patients and their care team from 2013 until 2018 was extracted and the feasibility of applying NLP categorization algorithms was assessed.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of UCLA eIBD platform. AI: artificial intelligence; API: application programming interface.</p>
          </caption>
          <graphic xlink:href="jmir_v22i5e15589_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>All patients gave informed consent to participate. This study was approved by the Institutional Review Board (IRB) at UCLA with IRB protocol number 17-001208.</p>
      </sec>
      <sec>
        <title>Data Collection and Anonymization</title>
        <p>The dialogs were extracted from the UCLA eIBD database. The data consisted of the following: (1) a unique identifier, (2) first name, (3) last name, (4) date and time of message, (5) direction of message (HCP to patient or vice versa), (6) message content, (7) potential attachments, (8) HCP classification (urgent and nonurgent), (9) HCP action (responded yes or no), and (10) HCP response message content (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The data were anonymized by removing the first and last names; for identification, we made use of the unique identifier in our analysis.</p>
      </sec>
      <sec>
        <title>Categorization Method: Use of Natural Language Processing</title>
        <p>Once the patient to HCP dialogs were stored in a Microsoft Excel sheet, the first 400 lines within the sheet were manually analyzed to identify relevant categories for use in our NLP algorithm. To clarify that the first 400 lines were representative, an additional 400 lines were randomly generated and manually reviewed as well (by AS and ZS). The analysis consisted of reading over each line to find an intent; if a particular intent was seen to occur frequently in these first lines, it was noted as a relevant category. The rationale behind using only categories observed in the sample was to make sure that the categories coded for were relevant to what the patient sample was discussing with their HCPs. Furthermore, 2 IBD gastroenterologists reviewed the categories found from the sample and reaffirmed that each category was representative of the IBD patient conversations they had encountered through electronic channels such as email. The same first 400 lines were then used to identify which keywords could assign a given dialog to a certain category (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). If a term appeared roughly 10 or more times in a given category, it was noted as a potential keyword; 2 physicians then reviewed and approved our list terms. Using these keywords, we employed a simplified, rule-based bag-of-words model to assign each line of dialog to the appropriate categories (<xref rid="figure2" ref-type="fig">Figure 2</xref>). The bag-of-words model essentially allows one to extract particular features of a text, that is, keywords, and score them with relevant numbers for modeling, or in our case, categorization [<xref ref-type="bibr" rid="ref17">17</xref>]. To be certain, each line was converted into a standard bag-of-words with a score for each word in the form of a count of the number of times it appears within the line. With stop words removed, we extract the score of each keyword from all lines and assign to each line all categories for which any one keyword has a positive score.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Flowchart of inclusion and categorization. N/A: not applicable.</p>
          </caption>
          <graphic xlink:href="jmir_v22i5e15589_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Enhancement and Correlation Assessment</title>
        <p>On the basis of the preliminary results, the keywords of our initial categorization algorithm were refined, and new categories were created if necessary. If the categorization was not logical, we analyzed which keywords in the model miscategorized the dialog and made the necessary improvements. In addition, any uncategorized lines of dialogs were assigned a category, and their keywords were identified. The categorization algorithm was updated with the new, physician-approved keywords extracted from the uncategorized lines of dialog and the improvements of the existing categorization.</p>
        <p>Once the code was refined to capture all the lines of dialog, a heat map was generated to showcase the overlap in categories, which refers to one line of dialog from a patient falling into two categories. It is worth noting that more than two categories could overlap, but there was no way to represent the higher levels of overlap in a relevant and concise diagram such as a heat map. The goal was to paint a picture of what types of questions or concerns popped up together, which is instrumental in the actual development of a chatbot and creation of multicategory scenarios.</p>
      </sec>
      <sec>
        <title>Validation of Accuracy</title>
        <p>The accuracy of our categorization algorithm was tested by having 3 independent physicians from the UCLA Division of Digestive Diseases (AZ, CR, and DH) evaluate the appropriateness of the categorization. Each physician was assigned to categorize 100 randomly collected lines of dialog using the defined corresponding category number. In addition, the physicians categorized each line in the same style as the algorithm: numerical order with no spaces.</p>
        <p>Once each of the doctors had finished categorizing the lines, the results were compared with the algorithm’s categorization. We showcased the extent to which the algorithm and the doctors agreed or disagreed. To do this, the number of underclassifications and overclassifications the categorization algorithm made relative to the doctors’ categories was calculated. For instance, if the algorithm missed a category that the doctor had, it would be counted as an underclassification of 1; if the category code had an extra category compared with the doctor, it would be counted as an overclassification of 1. We then created a bar chart plot based on this data. In addition, to understand the practicality of treating the doctors’ assessments as ground truth, we computed the level of agreement between the three raters using Krippendorf alpha. This is a standard estimate of inter-rater reliability across ratings on a nominal scale.</p>
        <p>To calculate a metric for the accuracy of the algorithm itself, we opted to use a nonstandard method of computing the success of the classification algorithm in an attempt to incorporate expert knowledge about the severity of misclassifications. As standard reliability measures such as Krippendorf alpha treat all disagreements between the raters and the algorithm with equal weight, we would not get a realistic view of the algorithm’s strength across the spectrum of categories by following this approach. This was also done in an attempt to avoid aggregating our multiclass labels from the raters as doing so would put us at risk of destroying the variability in the ratings and inflating performance.</p>
      </sec>
      <sec>
        <title>Software</title>
        <p>Excel 2010 and R studio programming tool (R 3.4.0) were used for our analysis and algorithm creation (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data and Population Characteristics</title>
        <p>Our sample consisted of 424 patients, 3 physicians, 3 nurses, and 2 administrative assistants with 16,453 lines of electronic dialog. Of the dialogs, 8324 lines were sent by 424 patients to their HCP (patient to HCP). Our analyzed patient cohort is 51.9% (220/424) female, 50.7% (215/424) have Crohn disease, and 46.9% (199/424) have ulcerative colitis with a mean disease duration of 13.4 (SD 10.4) years. The majority of the population is of the white (284/424, 67.0%) race and not of Hispanic or Latino ethnicity (386/424, 91.0%). Furthermore, most of the patients are employed (283/424, 66.7%) and have been enrolled in the care program for a mean of 4.6 (SD 1.3) years (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Characteristics of the inclusion cohort (N=424).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="500"/>
            <col width="470"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Variable</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Age (years), mean (SD)</td>
                <td>42 (14)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Gender, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>220 (51.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>204 (48.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Disease type, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Crohn disease</td>
                <td>215 (50.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ulcerative colitis</td>
                <td>199 (46.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Indeterminate colitis</td>
                <td>10 (2.4)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Disease duration (years), mean (SD)</td>
                <td>13.4 (10.4)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Race, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White</td>
                <td>284 (67.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unknown</td>
                <td>97 (22.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian</td>
                <td>26 (6.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black or African American</td>
                <td>12 (2.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>American Indian or Alaska Native</td>
                <td>4 (0.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Native Hawaiian</td>
                <td>1 (0.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Ethnicity, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not Hispanic or Latino</td>
                <td>386 (91.0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic or Latino</td>
                <td>29 (6.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unknown</td>
                <td>9 (2.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Employment, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="2">
                  <break/>
                </td>
                <td>Employed</td>
                <td>283 (66.7)</td>
              </tr>
              <tr valign="top">
                <td>Unemployed or unknown</td>
                <td>141 (33.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Duration in program (years), mean (SD)</td>
                <td>4.6 (1.3)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Algorithm Development and Initial Results</title>
        <p>In our manual run-through of the first 400 out of the 8324 lines of dialog, we categorized them in six newly created and distinct categories: (1) medications, (2) symptoms, (3) appointments, (4) laboratory investigations, (5) finance/insurance, and (6) miscellaneous (lines that did not fall into any of the other categories). When the additional randomly generated 400 lines were reviewed for clarification, the same five relevant categories were found. At this point, we also kept a not applicable (N/A) section for automated responses produced by the mobile app itself that were in the dataset. For instance, “Patient has indicated there are no changes to medications.”</p>
        <p>We identified what keywords were relevant to each of the categories (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). A categorization algorithm (bags-of-words model) was created based on the keywords extracted from the dialogs in the categories and applied to categorize the remaining lines of dialog.</p>
        <p>Out of the 8324 lines of dialogs, the algorithm initially returned symptoms (1781/8324, 21.40% lines), medications (2114/8324, 25.40% lines), appointments (1781/8324, 21.40% lines), laboratory investigations (1648/8324, 19.80% lines), finance or insurance (358/8324, 4.30% lines), miscellaneous (2830/8324, 34.00% lines), and N/A (666/8324, 8.00% lines).</p>
      </sec>
      <sec>
        <title>Enhancement of Natural Language Processing Categorization Algorithm</title>
        <p>The miscellaneous section (2828/8317, 34.00% lines) was manually reviewed for 200 lines. The miscellaneous section was essentially randomly generated in that it was not organized by any dialog identifier, such as medical record number or patient name; it was simply the arbitrarily leftover dialogs from our initial run of the algorithm. As the dialogs here were short and not dominated by any one patient, we found it appropriate to review the first 200 lines as an accurate representation of the larger section. On review, two additional categories were identified within it: communications and procedures. In addition, the miscellaneous category was analyzed for keywords that would improve the scope of our initial categories. For instance, there were some medications we missed in our first test, such as Tylenol, that we were able to find upon review of the miscellaneous section and add as a keyword for medications. Furthermore, we removed keywords from the algorithm that were too general and inflated certain categories, such as the keyword <italic>take</italic> for the medications category.</p>
        <p>Finally, the categorization algorithm was enhanced to remove dialog that only contained generic greetings, such as <italic>Thank you</italic> or <italic>Hello</italic>, and the automated responses from the N/A section from the dataset so that they did not affect the final counts. After this enhancement, 2131 lines were excluded and 6193 lines of dialog were left for categorization.</p>
      </sec>
      <sec>
        <title>Final Natural Language Processing Categorization Results</title>
        <p>These refinements ultimately led to the algorithm yielding 32.83% (2033/6193) of the dialog relating to symptoms, 38.70% (2397/6193) to medications, 24.51% (1518/6193) to appointments, 34.01% (2106/6193) to laboratory investigations, 7.22% (447/6193) to finance or insurance, 34.89% (2161/6193) to communications, 9.96% (617/6193) to procedures, and 10.08% (624/6193) being miscellaneous (<xref ref-type="table" rid="table3">Table 3</xref>). The frequency of this overlap was measured for each possible pair combination of the categories and is displayed in a heat map (<xref rid="figure3" ref-type="fig">Figure 3</xref>). For instance, medications and symptoms appeared more together than they did on their own, as did communications and symptoms. Similarly, procedures and finance were very rarely brought up on their own (<xref rid="figure3" ref-type="fig">Figure 3</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Final categorization results (N=6193).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>Percentage of total sample<sup>a</sup>, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Symptoms</td>
                <td>2033 (32.83)</td>
              </tr>
              <tr valign="top">
                <td>Medications</td>
                <td>2397 (38.70)</td>
              </tr>
              <tr valign="top">
                <td>Appointments</td>
                <td>1518 (24.51)</td>
              </tr>
              <tr valign="top">
                <td>Laboratory investigations</td>
                <td>2106 (34.01)</td>
              </tr>
              <tr valign="top">
                <td>Finance or insurance</td>
                <td>447 (7.22)</td>
              </tr>
              <tr valign="top">
                <td>Communications</td>
                <td>2161 (34.89)</td>
              </tr>
              <tr valign="top">
                <td>Procedures</td>
                <td>617 (9.96)</td>
              </tr>
              <tr valign="top">
                <td>Miscellaneous</td>
                <td>624 (10.08)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>These percentages represent how frequently these categories occur in the sample of dialogs. As the categories mostly overlap in the dialogs, the percentages do not add up to 100%.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Heat map of category overlaps in dialog. This map shows the frequency of category overlap in pairs and how often the categories occurred by themselves out of the 6193 dialogs. Note: across the diagonal, the map is a mirror of itself.</p>
          </caption>
          <graphic xlink:href="jmir_v22i5e15589_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Validation of Natural Language Processing Accuracy</title>
        <p>Three independent raters (AZ, DH, and CR) categorized 100 random lines of dialog, and their categorization was compared with our algorithms. The raters categorized in the exact style of the algorithm, so if the categories were symptoms, appointments, and medications, they would write <italic>123</italic>. Applying Krippendorf alpha to these assessment ratings, we get an estimate of .61, indicating that there was moderate-to-high agreement between the doctors.</p>
        <p>In our underclassification and overclassification representation of the chatbot’s accuracy, we found that most of the errors were pooled at one difference, suggesting that the code and the doctors had a high level of agreement on most of the dialogs. Furthermore, the graph we constructed shows that the category code tended to over classify rather than under classify the subjects of the dialogs (<xref ref-type="table" rid="table4">Table 4</xref>). As one can see from the table, there is a significant drop in the instances of two or more underclassifications, with four to five missed categories having a frequency of 0 (<xref ref-type="table" rid="table4">Table 4</xref>). When we accounted for the 1 to 2 overclassifcation differences and the one category underclassification differences as minor, we found that 285 of the 300 tests had the program and physicians reasonably agreeing on categories. This meant that our code showed minor to no differences in 95% (285/300) of cases.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Accuracy test results.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="380"/>
            <col width="320"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Number of categories added or missed by the algorithm in a given line</td>
                <td>Instances in sample for overclassification, n</td>
                <td>Instances in sample for underclassification</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>71</td>
                <td>47</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>29</td>
                <td>5</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>5</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>3</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>1</td>
                <td>0</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We were successful in categorizing large amounts of electronic messages between patients and providers into a reasonable number of categories (&#60;10). Roughly 90.00% (5574/6193) of dialogs that came from patients fell into only seven categories, which shows potential for developing a chatbot with an NLP algorithm that can handle most IBD patient’s questions and concerns. In addition, our heat map gave us insight into how these categories correlate with each other in the dialogs. In terms of chatbot development, this map allows a developer to be aware of what categories or topics tend to appear together in patient with IBD to HCP dialogs. This insight would allow the developer to better prepare the chatbot's NLP algorithm to identify topic transitions in a patient conversation and respond appropriately. In addition, our accuracy test supported the reliability of this result. Most of the differences recorded in our test (100/162, 61.0%) were simply due to code over classifying with one or two categories, but it rarely missed the primary intent (<xref ref-type="table" rid="table4">Table 4</xref>). Even when it did miss a category relative to the physician, the program was not necessarily incorrect upon review. For instance, one of the dialogs in the accuracy sample had a patient describing their symptoms or medications and subtly mentioning their laboratory investigations as their <italic>previous averages</italic>. Although the doctors recognized this and appropriately categorized the line as symptoms, medications, and laboratory investigations, the algorithm categorized it as symptoms and medications only, as averages was not a keyword we had programmed for laboratory investigations. Despite this, the program correctly identified the primary intent of the dialog, which is why we considered these types of differences minor in measuring the accuracy of our program.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>One limitation of this study is that our patient sample is fairly homogenous, consisting of mostly young (mean age 42 years) and white patients, which limits the generalizability of our results to other populations. In addition, most of the patients in the study are employed, which could have potentially changed the types of questions or concerns they expressed and the overall category distribution relative to other patient populations. It is also worth noting that we used the expert opinions of 2 IBD gastroenterologists to support the validity of the categories chosen and the selected keywords. This may affect the reproducibility of our results.</p>
      </sec>
      <sec>
        <title>Comparisons With Prior Work</title>
        <p>The next step from collecting data to developing a chatbot is to use machine learning methods to model the relationship between questions and responses [<xref ref-type="bibr" rid="ref18">18</xref>]. Many chatbot knowledge bases (the database from which a chatbot draws its responses from) are hand constructed, which is time consuming and reduces the algorithm’s versatility [<xref ref-type="bibr" rid="ref19">19</xref>]. For instance, Artificial Linguistic Internet Computer Entity and <italic>ELIZA</italic>, two classic chatbots, utilize hand-constructed databases to generate a response that matches a given human input [<xref ref-type="bibr" rid="ref20">20</xref>]. As an alternative, some developers have attempted to extract high-quality dialog data from online discussion forums to efficiently create a knowledge base for specific domain chatbots [<xref ref-type="bibr" rid="ref19">19</xref>]. The purpose of collecting these dialog datasets is to give the chatbot a training ground to learn how to accurately respond to a specific domain of human input responses with minimal human fine tuning, or simply put: machine learning [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. This machine learning approach also allows for the chatbot to continue learning through its interactions and improve its accuracy. Microsoft’s Xiaoice chatbot has successfully applied this model and has already amassed a following of about 660 million online users [<xref ref-type="bibr" rid="ref22">22</xref>]. When assessing the appropriateness of our data for actual chatbot development, our code could be distributed and tested in other centers with the same historical data without requiring much customization and would eliminate the need for hand-constructed databases.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Looking at the global trends of technology in health care, usage of smartphones and electronic health apps is on the rise [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Patient-provider communication through electronic messaging apps is becoming the standard. In our population, 25.0% (1518/6193) of messages were related to appointments. A chatbot could effectively automate requests regarding booking and cancellations or even play an instrumental part of triage, following the same guidelines as nurses, saving the provider team valuable time that could be redistributed to better patient care. The benefit is that a chatbot is available at all times, can handle tremendous amounts of conversation, and has no wait times.</p>
        <p>Through the UCLA eIBD platform, we have already created a high-quality knowledge base of human dialogs that can be used to train an IBD chatbot using NLP. We showcased that it is feasible to categorize large amounts of electronic messaging data in one of the most complex chronic conditions into a reasonable number of categories. Given the feasibility of this categorization and the potential benefits of a chatbot, the next step would be to develop a chatbot and test it in a patient population with IBD. Further studies are required to showcase the effect on patients, providers, and costs and potential extrapolation to other chronic conditions.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Section 1, Table of dialog data content.</p>
        <media xlink:href="jmir_v22i5e15589_app1.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Section 2, Table of keywords for categories.</p>
        <media xlink:href="jmir_v22i5e15589_app2.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Section 3, Copy of natural language processing algorithm code.</p>
        <media xlink:href="jmir_v22i5e15589_app3.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">HCP</term>
          <def>
            <p>health care provider</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">IBD</term>
          <def>
            <p>inflammatory bowel diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">IRB</term>
          <def>
            <p>Institutional Review Board</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">N/A</term>
          <def>
            <p>not applicable</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">UCLA</term>
          <def>
            <p>University of California, Los Angeles</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="con">
        <p>AZ was responsible for the study concept and design, analysis and interpretation of data, drafting of the manuscript, critical revision of the manuscript for important intellectual content, statistical analysis, and study supervision. AS was responsible for acquisition of data, drafting of the manuscript, statistical analysis, and critical revision of the manuscript for important intellectual content. ZS performed the statistical analysis and interpretation of the data. CR was responsible for the analysis and interpretation of data and the critical revision of the manuscript for important intellectual content. AM performed the revision of the manuscript for important intellectual content. JS was involved in the analysis and interpretation of data and the revision of the manuscript for important intellectual content. DH was in charge of the study concept and design, analysis and interpretation of the data, drafting of the manuscript, critical revision of the manuscript for important intellectual content, and the study supervision.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Becoming Human: Artificial Intelligence Magazine</source>
          <year>2018</year>
          <access-date>2018-10-17</access-date>
          <comment>4 Industries Artificial Intelligence Is Transforming<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://becominghuman.ai/4-industries-artificial-intelligence-is-transforming-fe27b750769b">https://becominghuman.ai/4-industries-artificial-intelligence-is-transforming-fe27b750769b</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kerschberg</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>Forbes Magazine</source>
          <year>2018</year>
          <access-date>2018-10-17</access-date>
          <comment>How Real-Time AI is Accelerating the Disruption of Healthcare (Interview with Nuance Communications)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.forbes.com/sites/benkerschberg/2018/03/19/__trashed-3/#15a28317530f">https://www.forbes.com/sites/benkerschberg/2018/03/19/__trashed-3/#15a28317530f</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brownlee</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Machine Learning Mastery</source>
          <year>2019</year>
          <access-date>2018-10-17</access-date>
          <comment>What is Deep Learning?<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://machinelearningmastery.com/what-is-deep-learning/">https://machinelearningmastery.com/what-is-deep-learning/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pearl</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Forbes Magazine</source>
          <year>2018</year>
          <access-date>2018-10-15</access-date>
          <comment>Artificial Intelligence In Healthcare: Separating Reality From Hype<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.forbes.com/sites/robertpearl/2018/03/13/artificial-intelligence-in-healthcare/#417555031d75">https://www.forbes.com/sites/robertpearl/2018/03/13/artificial-intelligence-in-healthcare/#417555031d75</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shewan</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>WordStream: Online Advertising Made Easy</source>
          <year>2020</year>
          <access-date>2018-11-15</access-date>
          <comment>10 of the Most Innovative Chatbots on the Web<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.wordstream.com/blog/ws/2017/10/04/chatbots">https://www.wordstream.com/blog/ws/2017/10/04/chatbots</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sennaar</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <source>Emerj-Artificial Intelligence Research and Insight</source>
          <year>2019</year>
          <access-date>2018-10-15</access-date>
          <comment>Chatbots for Healthcare–Comparing 5 Current Applications<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://emerj.com/ai-application-comparisons/chatbots-for-healthcare-comparison/">https://emerj.com/ai-application-comparisons/chatbots-for-healthcare-comparison/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kowatsch</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nißen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shih</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Rüegger</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Volland</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Filler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Künzler</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Barata</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Haug</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Büchter</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brogle</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Heldt</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gindrat</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Farpour-Lambert</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>l'Allemand</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Text-Based Healthcare Chatbots Supporting Patient and Health Professional Teams: Preliminary Results of a Randomized Controlled Trial on Childhood Obesity</article-title>
          <source>17th International Conference on Intelligent Virtual Agents</source>
          <year>2017</year>
          <conf-name>IVA'17</conf-name>
          <conf-date>August 27-30, 2017</conf-date>
          <conf-loc>Stockholm, Sweden</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Comendador</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Francisco</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Medenilla</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Nacion</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Serac</surname>
              <given-names>TB</given-names>
            </name>
          </person-group>
          <article-title>Pharmabot: a pediatric generic medicine consultant chatbot</article-title>
          <source>J Automation Control Eng</source>
          <year>2015</year>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>137</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.12720/joace.3.2.137-140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Iroju</surname>
              <given-names>OG</given-names>
            </name>
            <name name-style="western">
              <surname>Olaleke</surname>
              <given-names>JO</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of natural language processing in healthcare</article-title>
          <source>Int J Info Technol Comput Sci</source>
          <year>2015</year>
          <month>07</month>
          <day>8</day>
          <volume>7</volume>
          <issue>8</issue>
          <fpage>44</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.5815/ijitcs.2015.08.07</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Liddy</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Bradley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wheatley</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Modeling public health interventions for improved access to the gray literature</article-title>
          <source>J Med Libr Assoc</source>
          <year>2005</year>
          <month>10</month>
          <volume>93</volume>
          <issue>4</issue>
          <fpage>487</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/16239945"/>
          </comment>
          <pub-id pub-id-type="medline">16239945</pub-id>
          <pub-id pub-id-type="pmcid">PMC1250325</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ananthakrishnan</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>VS</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Plenge</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>KP</given-names>
            </name>
          </person-group>
          <article-title>Improving case definition of Crohn's disease and ulcerative colitis in electronic medical records using natural language processing: a novel informatics approach</article-title>
          <source>Inflamm Bowel Dis</source>
          <year>2013</year>
          <month>06</month>
          <volume>19</volume>
          <issue>7</issue>
          <fpage>1411</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23567779"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MIB.0b013e31828133fd</pub-id>
          <pub-id pub-id-type="medline">23567779</pub-id>
          <pub-id pub-id-type="pmcid">PMC3665760</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raja</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hardin</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Text mining in healthcare. Applications and opportunities</article-title>
          <source>J Healthc Inf Manag</source>
          <year>2008</year>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>52</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="medline">19267032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ogren</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Mayo clinical text analysis and knowledge extraction system (cTAKES): architecture, component evaluation and applications</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>507</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20819853"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
          <pub-id pub-id-type="medline">20819853</pub-id>
          <pub-id pub-id-type="pii">17/5/507</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kocaballi</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Quiroz</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Rezazadegan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Willcock</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Laranjo</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A network model of activities in primary care consultations</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>10</month>
          <day>1</day>
          <volume>26</volume>
          <issue>10</issue>
          <fpage>1074</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31329875"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz046</pub-id>
          <pub-id pub-id-type="medline">31329875</pub-id>
          <pub-id pub-id-type="pii">5476188</pub-id>
          <pub-id pub-id-type="pmcid">PMC6748800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Petryszyn</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Witczak</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Costs in inflammatory bowel diseases</article-title>
          <source>Prz Gastroenterol</source>
          <year>2016</year>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27110304"/>
          </comment>
          <pub-id pub-id-type="doi">10.5114/pg.2016.57883</pub-id>
          <pub-id pub-id-type="medline">27110304</pub-id>
          <pub-id pub-id-type="pii">26941</pub-id>
          <pub-id pub-id-type="pmcid">PMC4814543</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Deen</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>van der Meulen-de Jong</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Parekh</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zand</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>DiNicola</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Inserra</surname>
              <given-names>EK</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Ha</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Esrailian</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>van Oijen</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Hommes</surname>
              <given-names>DW</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of an inflammatory bowel diseases monitoring index for use with mobile health technologies</article-title>
          <source>Clin Gastroenterol Hepatol</source>
          <year>2016</year>
          <month>12</month>
          <volume>14</volume>
          <issue>12</issue>
          <fpage>1742</fpage>
          <lpage>50.e7</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cgh.2015.10.035</pub-id>
          <pub-id pub-id-type="medline">26598228</pub-id>
          <pub-id pub-id-type="pii">S1542-3565(15)01546-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brownlee</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Machine Learning Mastery</source>
          <year>2017</year>
          <access-date>2019-10-17</access-date>
          <comment>A Gentle Introduction to the Bag-of-Words Model<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://machinelearningmastery.com/gentle-introduction-bag-words-model/">https://machinelearningmastery.com/gentle-introduction-bag-words-model/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramesh</surname>
              <given-names>VC</given-names>
            </name>
          </person-group>
          <source>Chatbots Magazine</source>
          <year>2016</year>
          <access-date>2019-10-17</access-date>
          <comment>Unsupervised Deep Learning for Vertical Conversational Chatbots<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://chatbotsmagazine.com/unsupervised-deep-learning-for-vertical-conversational-chatbots-c66f21b1e0f">https://chatbotsmagazine.com/unsupervised-deep-learning-for-vertical-conversational-chatbots-c66f21b1e0f</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Extracting Chatbot Knowledge From Online Discussion Forums</article-title>
          <source>Proceedings of the 20th international joint conference on Artifical Intelligence</source>
          <year>2007</year>
          <conf-name>IJCAI'07</conf-name>
          <conf-date>January 6-12, 2007</conf-date>
          <conf-loc>Hyderabad, India</conf-loc>
          <fpage>423</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.5555/1625275.1625342</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shum</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>From Eliza to XiaoIce: challenges and opportunities with social chatbots</article-title>
          <source>Front Inf Technol Electron Eng</source>
          <year>2018</year>
          <month>01</month>
          <day>8</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>10</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.1631/fitee.1700826</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yufeng</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>Towards Data Science</source>
          <year>2017</year>
          <access-date>2019-10-17</access-date>
          <comment>The 7 Steps of Machine Learning<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://towardsdatascience.com/the-7-steps-of-machine-learning-2877d7e5548e">https://towardsdatascience.com/the-7-steps-of-machine-learning-2877d7e5548e</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shum</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>arXiv</source>
          <year>2019</year>
          <access-date>2019-10-17</access-date>
          <comment>The Design and Implementation of XiaoIce, an Empathetic Social Chatbot<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1812.08989">https://arxiv.org/abs/1812.08989</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
