<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v24i2e30397</article-id>
      <article-id pub-id-type="pmid">35142636</article-id>
      <article-id pub-id-type="doi">10.2196/30397</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Monitoring COVID-19 on Social Media: Development of an End-to-End Natural Language Processing Pipeline Using a Novel Triage and Diagnosis Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Trifan</surname>
            <given-names>Alina</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Doan</surname>
            <given-names>Son</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Hasan</surname>
            <given-names>Abul</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science and Information Systems</institution>
            <institution>Birkbeck, University of London</institution>
            <addr-line>Malet Street, Bloomsbury</addr-line>
            <addr-line>London, WC1E 7HX</addr-line>
            <country>United Kingdom</country>
            <phone>44 020 7631 8147</phone>
            <email>abulhasan@dcs.bbk.ac.uk</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8430-0451</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Levene</surname>
            <given-names>Mark</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8632-4732</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Weston</surname>
            <given-names>David</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9459-3430</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Fromson</surname>
            <given-names>Renate</given-names>
          </name>
          <degrees>MBBS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2609-3033</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Koslover</surname>
            <given-names>Nicolas</given-names>
          </name>
          <degrees>BMBCh</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7272-7258</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Levene</surname>
            <given-names>Tamara</given-names>
          </name>
          <degrees>BMBCh</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2443-2620</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science and Information Systems</institution>
        <institution>Birkbeck, University of London</institution>
        <addr-line>London</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Barnet General Hospital</institution>
        <addr-line>London</addr-line>
        <country>United Kingdom</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Abul Hasan <email>abulhasan@dcs.bbk.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>2</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>28</day>
        <month>2</month>
        <year>2022</year>
      </pub-date>
      <volume>24</volume>
      <issue>2</issue>
      <elocation-id>e30397</elocation-id>
      <history>
        <date date-type="received">
          <day>13</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>3</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>9</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>2</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Abul Hasan, Mark Levene, David Weston, Renate Fromson, Nicolas Koslover, Tamara Levene. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 28.02.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2022/2/e30397" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The COVID-19 pandemic has created a pressing need for integrating information from disparate sources in order to assist decision makers. Social media is important in this respect; however, to make sense of the textual information it provides and be able to automate the processing of large amounts of data, natural language processing methods are needed. Social media posts are often noisy, yet they may provide valuable insights regarding the severity and prevalence of the disease in the population. Here, we adopt a triage and diagnosis approach to analyzing social media posts using machine learning techniques for the purpose of disease detection and surveillance. We thus obtain useful prevalence and incidence statistics to identify disease symptoms and their severities, motivated by public health concerns.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop an end-to-end natural language processing pipeline for triage and diagnosis of COVID-19 from patient-authored social media posts in order to provide researchers and public health practitioners with additional information on the symptoms, severity, and prevalence of the disease rather than to provide an actionable decision at the individual level.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The text processing pipeline first extracted COVID-19 symptoms and related concepts, such as severity, duration, negations, and body parts, from patients’ posts using conditional random fields. An unsupervised rule-based algorithm was then applied to establish relations between concepts in the next step of the pipeline. The extracted concepts and relations were subsequently used to construct 2 different vector representations of each post. These vectors were separately applied to build support vector machine learning models to triage patients into 3 categories and diagnose them for COVID-19.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We reported macro- and microaveraged F<sub>1</sub> scores in the range of 71%-96% and 61%-87%, respectively, for the triage and diagnosis of COVID-19 when the models were trained on human-labeled data. Our experimental results indicated that similar performance can be achieved when the models are trained using predicted labels from concept extraction and rule-based classifiers, thus yielding end-to-end machine learning. In addition, we highlighted important features uncovered by our diagnostic machine learning models and compared them with the most frequent symptoms revealed in another COVID-19 data set. In particular, we found that the most important features are not always the most frequent ones.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our preliminary results show that it is possible to automatically triage and diagnose patients for COVID-19 from social media natural language narratives, using a machine learning pipeline in order to provide information on the severity and prevalence of the disease for use within health surveillance systems.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>conditional random fields</kwd>
        <kwd>disease detection and surveillance</kwd>
        <kwd>medical social media</kwd>
        <kwd>natural language processing</kwd>
        <kwd>severity and prevalence</kwd>
        <kwd>support vector machines</kwd>
        <kwd>triage and diagnosis</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Overview</title>
        <p>During the ongoing coronavirus pandemic, hospitals have been continuously at risk of being overwhelmed by the number of people developing serious illness. People in the United Kingdom were advised to stay at home if they had coronavirus symptoms and to seek assistance through the National Health Service (NHS) helpline if they needed to [<xref ref-type="bibr" rid="ref1">1</xref>]. Consequently, there is an urgent need to develop novel, practical approaches to assist medical staff. A variety of methods have been recently developed that involve <italic>natural language processing</italic> (NLP) techniques; the concerns of these methods range from the level of the individual (see, for example, [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]) up to the population level [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>Herein, we take a diagnostic approach and propose an end-to-end NLP pipeline to automatically triage and diagnose COVID-19 cases from patient-authored medical social media posts. The triage may inform decision makers about the severity of COVID-19, and diagnosis could help in gauging the prevalence of infections in the population. Attempting a clinical diagnosis of influenza, or in our case a diagnosis of COVID-19, purely based on the information provided in a social media post is unlikely to be sufficiently accurate to be actionable at an individual level, since the quality of this information will be typically noisy and incomplete. However, it is not necessary to have actionable diagnoses at the individual level in order to identify interesting patterns at the population level, which may be useful within public health surveillance systems. For example, text messages from the microblogging site Twitter were used to identify influenza outbreaks [<xref ref-type="bibr" rid="ref6">6</xref>]. In addition, Twitter data in conjunction with a US Centers for Disease Control and Prevention (CDC) data set were used to predict the percentage of influenza-like illness in the US population [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>One of our key concerns is in the production of a high-quality human-labeled data set on which to build our pipeline. Here, we give a brief overview of our pipeline and how we developed our data set. The first step in the pipeline was attained by developing an annotation application that detects and highlights COVID-19-related symptoms with their severity and duration in a social media post, henceforth collectively termed as <italic>concepts</italic>. During the second step, relations between symptoms and other relevant concepts were also automatically identified and annotated. For example, <italic>breathing hurts</italic> is a symptom, which is related to a body part, the <italic>upper chest area</italic>.</p>
        <p>One author manually annotated our data with concepts and relations, allowing us to present posts highlighted with identified concepts and relations to 3 experts, along with several questions, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The first question asked the experts to triage a patient into 1 of the following 3 categories: <italic>Stay at home, Send to a GP</italic> (where GP stands for general physician), or <italic>Send to a hospital</italic>. The second question asked to diagnose the likelihood of COVID-19 on a Likert scale of 1-5 [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>The 3 experts are junior doctors working in the United Kingdom who were redeployed to work on COVID-19 wards during the first wave of the pandemic, between March and July 2020. Their roles involved the diagnosis and management of patients with COVID-19, including patients who were particularly unwell and required either noninvasive or invasive ventilation. There were some training sessions organized for doctors working in COVID-19 wards. However, these were only provided toward the end of the first wave, as there was initially little knowledge of the virus and how to treat it. In the hospital, the doctors followed local protocols, which were adjusted as more experience was gained about the virus.</p>
        <p>We also asked the doctors to indicate whether the highlighted text presented is sufficient in reaching their decision in order to understand its usefulness when we incorporate it in the annotation interface. The annotations were found to be sufficient in as many as 85% of the posts, on average, as indicated by the doctors’ answers to question 3 in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <p>The posts labeled by the doctors were then used to construct 2 types of predictive machine learning model using <italic>support vector machines</italic> (SVMs) [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]; see the Step 4: Triage and Diagnosis subsection in the Methods section. The <italic>triage models</italic> use hierarchical binary classifiers, which consider the risk averseness or tolerance of the doctors when making the diagnosis [<xref ref-type="bibr" rid="ref11">11</xref>]. The <italic>diagnostic models</italic> first calculate the probability of a patient having COVID-19 from doctors’ ratings. The probabilities are then used to construct 3 different decision functions for classifying <italic>COVID</italic> and <italic>NO_COVID</italic> classes; these are detailed in the Problem Setting subsection in the Methods section.</p>
        <p>We trained the SVM models in 2 different ways: first with ground-truth annotations and second using predictions from the concept and relation extraction step described before. Predictions obtained from the concept extraction step make use of <italic>conditional random fields</italic> (CRFs) [<xref ref-type="bibr" rid="ref12">12</xref>]; see the Step 1: Concept Extraction subsection in the Methods section for implementation details. Relations are obtained from these predicted concepts using an unsupervised <italic>rule-based</italic> (RB) classifier [<xref ref-type="bibr" rid="ref13">13</xref>]; see the Step 2: Relation Extraction subsection in the Methods section.</p>
        <p>We also discussed the feature importance obtained from the constructed COVID-19 diagnostic models and compared it with the most frequent symptoms from Sarker et al [<xref ref-type="bibr" rid="ref4">4</xref>] and our data set. We found that symptoms such as anosmia/ageusia (loss of smell/taste) rank in the top 5 most important features, whereas they do not rank in the top 5 most frequent symptoms; see the Discussion section. Overall, we made several contributions as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>We showed that it is possible to take an approach that aims at disease detection to augment public health surveillance systems, by constructing machine learning models to triage and diagnose COVID-19 from patients' natural language narratives. To the best of our knowledge, no other previous work has attempted to triage or diagnose COVID-19 from social media posts.</p>
          </list-item>
          <list-item>
            <p>We also built an end-to-end NLP pipeline by making use of automated concept and relation extraction. Our experiments showed that the models built using predictions from concept and relation extraction produce similar results to those built using ground-truth human concept annotation.</p>
          </list-item>
        </list>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>A patient-authored social media post is annotated with symptoms (light green), affected body parts (pale blue), duration (light yellow), and severities (pink). The phrases in square brackets show relations between a symptom and a body part/duration/severity when the distance is greater than 1. This annotated post was presented to 3 doctors to triage and diagnose the author of the post by answering questions 1 and 2, respectively. GP: general physician.</p>
          </caption>
          <graphic xlink:href="jmir_v24i2e30397_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>Data derived from social media have been successfully used to facilitate the detection of influenza epidemics [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. In addition, Edo-Osagie et al [<xref ref-type="bibr" rid="ref14">14</xref>] provide a thorough review of the use of Twitter in public health surveillance for the purpose of monitoring, detecting, and forecasting influenza-like illnesses. Since the start of the COVID-19 pandemic, a number of mobile application–based, self-reported symptom tools have emerged to track novel symptoms [<xref ref-type="bibr" rid="ref15">15</xref>]. The mobile application in Menni et al [<xref ref-type="bibr" rid="ref16">16</xref>] applied logistic regression (LR) to predict the percentage of probable infected cases among the total application users in the United States and United Kingdom combined. Mizrahi et al [<xref ref-type="bibr" rid="ref17">17</xref>] performed a statistical analysis on primary care electronic health record (EHR) data to find longitudinal dynamics of symptoms prior to and throughout the infection.</p>
        <p>At an individual diagnostic level, Zimmerman et al [<xref ref-type="bibr" rid="ref18">18</xref>] applied classification and regression trees to determine the likelihood of symptom severity of influenza in clinical settings. Moreover, machine learning algorithms, such as decision trees, have shown promising results in detecting COVID-19 from blood test analyses [<xref ref-type="bibr" rid="ref19">19</xref>]. Here, we focus on features extracted from a textual source to triage and diagnose COVID-19 for the purpose of providing population-level statistics in the context of public health surveillance. Studies related to our work deploy features obtained from online portals, telehealth visits, and structured and unstructured patient/doctor notes from EHRs. In general, COVID-19 clinical prediction models can broadly be categorized into risk, diagnosis, and prognosis models [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <p>In Judson et al [<xref ref-type="bibr" rid="ref21">21</xref>], a portal-based COVID-19 self-triage and self-scheduling tool was used to segment patients into 4 risk categories: emergent, urgent, nonurgent, and self-care, whereas the online telemedicine system in Liu et al [<xref ref-type="bibr" rid="ref22">22</xref>] used LR to predict low-, moderate-, and high-risk patients by utilizing demographic information, clinical symptoms, blood tests, and computed tomography (CT) scan results.</p>
        <p>In Schwab et al [<xref ref-type="bibr" rid="ref3">3</xref>], various machine learning models were developed to predict patient outcomes from clinical, laboratory, and demographic features found in EHRs [<xref ref-type="bibr" rid="ref23">23</xref>]. The authors reported that gradient boosting (XGB), random forests, and SVMs are the best-performing models for predicting COVID-19 test results, hospital admissions, and intensive care unit admissions for positive patients, respectively. A detailed list of clinical and laboratory features can be found in Wang et al [<xref ref-type="bibr" rid="ref24">24</xref>], where the authors developed predictive models for the inpatient mortality in Wuhan using an ensemble of XGB models. Similarly, in Vaid et al [<xref ref-type="bibr" rid="ref25">25</xref>], mortality and critical events for patients using XGB classifiers were predicted. Finally, a critical review on various diagnostic and prognostic models of COVID-19 used in clinical settings can be found in Wynants et al [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <p>In Wagner et al [<xref ref-type="bibr" rid="ref26">26</xref>], COVID-19 symptoms from unstructured clinical notes in the EHRs of patients subjected to COVID-19 polymerase chain reaction (PCR) testing were extracted. In addition, COVID-19 SignSym [<xref ref-type="bibr" rid="ref27">27</xref>] was designed to automatically extract symptoms and related attributes from free text. Furthermore, the study by López-Úbeda et al [<xref ref-type="bibr" rid="ref28">28</xref>] utilized radiological text reports from lung CT scans to diagnose COVID-19. Similar to our approach, López-Úbeda et al [<xref ref-type="bibr" rid="ref28">28</xref>] first extracted concepts using a popular medical ontology [<xref ref-type="bibr" rid="ref29">29</xref>] and then constructed a document representation using word embeddings [<xref ref-type="bibr" rid="ref30">30</xref>] and concept vectors [<xref ref-type="bibr" rid="ref28">28</xref>]. However, our methodology differs from theirs with respect to the extraction of relations between concepts, and moreover, our data set, comprising posts obtained from medical social media, is more challenging to work with, since social media posts exhibit greater heterogeneity in language than radiological text reports.</p>
        <p>Finally, Sarker et al [<xref ref-type="bibr" rid="ref4">4</xref>] published a COVID-19 symptom lexicon extracted from Twitter, which we compared our work to in the Discussion section.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data</title>
        <p>We collected social media posts discussing COVID-19 medical conditions from a forum called <italic>Patient</italic> [<xref ref-type="bibr" rid="ref31">31</xref>]. This a public forum that was created at the onset of the coronavirus outbreak in the United Kingdom. We obtained permission from the site administrator to scrape publicly available posts dated between April and June 2020. In addition, all user IDs and metadata were removed from the posts for the purpose of the study. After the posts were anonymized, and duplicates were removed, we randomly selected 500 distinct posts. The first author annotated these posts with the classes shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. The class labels represent symptoms and the related concepts: (1) duration; (2) intensifier, which increases the level of symptom severity; (3) severity; (4) negation, which denotes the presence or absence of the symptom or severity; and (5) affected body parts. We also annotated relations between a symptom and other concepts that exist at the sentence level. For example, the relation between a symptom and a severity concept is denoted as <italic>(SYM, SEVERITY)</italic>. The posts were then marked with concepts in different colors, and the relations were placed right after the symptom in square brackets, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Each marked post was presented to the doctors using a web application, and they were asked 3 questions independently; see <xref rid="figure1" ref-type="fig">Figure 1</xref>. We called the doctors’ answers to questions 1 and 2 as the COVID-19 symptom triage and diagnosis, respectively. Thus, for each post, we had 3 independent answers from 3 doctors, which we denoted as A, B, and C, respectively; these corresponded to the last 3 authors of the paper and were assigned randomly.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Frequency distribution of annotated classes/concepts from the text are shown. We have also shown the percentage of each class after discounting the OTHER labels. The average number of tokens per post was 130.17 (SD 97.83). BPOC: body part, organ, or organ component; SYM: symptoms.</p>
          </caption>
          <graphic xlink:href="jmir_v24i2e30397_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Measurement of Agreement</title>
          <p>To measure the agreement between the answers (recommendations and ratings) of the 3 doctors to questions 1 and 2 of <xref rid="figure1" ref-type="fig">Figure 1</xref>, we first calculated the proportion of observed agreement (<italic>ρ</italic><sub>o</sub>), as suggested by de Vet et al [<xref ref-type="bibr" rid="ref32">32</xref>], who stipulated that Cohen <italic>κ</italic> is actually a measure of reliability rather than agreement; we observed that <italic>ρ</italic><sub>o</sub> was high in all cases, as can be seen in <xref ref-type="table" rid="table1">Table 1</xref>. We noted that the paradoxical behavior of Cohen <italic>κ</italic> can arise when the absolute agreement (<italic>ρ</italic><sub>o</sub>) is high [<xref ref-type="bibr" rid="ref33">33</xref>]. This may occur when there is a substantial imbalance in the marginal totals of the answers, which we observed in the answers to question 1. Consequently, in addition to Cohen <italic>κ</italic>, we deployed a common solution to this problem, called the AC1 statistic devised by Gwet and coworkers [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>].</p>
          <p>We found that for question 1, the AC1 measure showed moderate agreement (in the middle of the moderate range) between A and B (0.55) and substantial agreement between A and C (0.72); see Landis and Koch [<xref ref-type="bibr" rid="ref36">36</xref>] for the benchmark scale for the strength of agreement. For question 2, it turned out that said paradox did not occur, resulting in similar values for <italic>κ</italic> and AC1. The agreement between A and B (<italic>κ</italic>=0.64, AC1=0.67) and between B and C (<italic>κ</italic>=0.64, AC1=0.67) was substantial, while the agreement between A and C (<italic>κ</italic>=0.40, AC1=0.40) was on the boundary of fair and moderate; see <xref ref-type="table" rid="table1">Table 1</xref>.</p>
          <p>It is important to note that COVID-19 is a novel virus disease, for which the doctors did not have prior experience or training before the first wave of the pandemic, and thus one would expect some difference of opinion. (We bear in mind that in our setting, the doctors can only see the posts and thus cannot interact with the patients as they would in a normal scenario.) Moreover, there are probable differences in risk tolerances between the doctors, which would lead to potentially different decisions and diagnoses.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Pairwise agreement between pairs of doctors’ answers to questions 1 and 2; see <xref rid="figure1" ref-type="fig">Figure 1</xref> for an example.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="140"/>
              <col width="150"/>
              <col width="140"/>
              <col width="140"/>
              <col width="150"/>
              <col width="140"/>
              <col width="140"/>
              <thead>
                <tr valign="top">
                  <td>Pair</td>
                  <td colspan="3">Question 1</td>
                  <td colspan="3">Question 2</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>
                    <italic>ρ</italic>
                    <sub>o</sub>
                  </td>
                  <td>
                    <italic>κ</italic>
                  </td>
                  <td>AC1</td>
                  <td>
                    <italic>ρ</italic>
                    <sub>o</sub>
                  </td>
                  <td>
                    <italic>κ</italic>
                  </td>
                  <td>AC1</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>AB</td>
                  <td>0.65</td>
                  <td>0.26</td>
                  <td>0.55</td>
                  <td>0.73</td>
                  <td>0.64</td>
                  <td>0.67</td>
                </tr>
                <tr valign="top">
                  <td>BC</td>
                  <td>0.63</td>
                  <td>0.14</td>
                  <td>0.53</td>
                  <td>0.73</td>
                  <td>0.64</td>
                  <td>0.67</td>
                </tr>
                <tr valign="top">
                  <td>AC</td>
                  <td>0.77</td>
                  <td>0.28</td>
                  <td>0.72</td>
                  <td>0.51</td>
                  <td>0.40</td>
                  <td>0.40</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Problem Setting</title>
        <sec>
          <title>Triage Classification for Question 1</title>
          <p>We mapped the doctors’ recommendations from question 1 to ordinal values; the options <italic>Stay at home</italic>, <italic>Send to a GP</italic>, or <italic>Send to a hospital</italic> were transformed to the values 1, 2, and 3, respectively. To combine recommendations from 2 or more doctors, we first took their average. This result was rounded to an integer in 1 of 2 ways: either by taking the floor or by taking the ceiling. Considering the risk attitude prevalent among medical practitioners [<xref ref-type="bibr" rid="ref11">11</xref>], we categorized the ceiling of the average to be <italic>risk averse</italic>, denoted by, for example, AB(R-a), and the floor to be <italic>risk tolerant</italic>, denoted by, for example, AB(R-t). Thus, for each patient’s post, we had in total 11 recommendations from 3 doctors for question 1. We constructed a hierarchical classification model for each of these recommendations, where the goal was to classify a post into 1 of the 3 options.</p>
        </sec>
        <sec>
          <title>Diagnosis Classification for Question 2</title>
          <p>To diagnose whether a patient has COVID-19 from their post, we first estimated the probability of having the disease by normalizing the rating (ie, given a rating, r, the probability of COVID-19, <italic>P</italic>r(COVID&#124;r), which we termed the <italic>ground-truth probability</italic> (GTP), was simply <italic>P</italic>r(COVID&#124;r) = (r – 1)/4.</p>
          <p>Given our GTP estimates were discrete, we investigated 3 decision boundaries, denoted by LE, LT, and NEQ, based on a threshold value of 0.5 to classify a post as follows:</p>
          <list list-type="bullet">
            <list-item>
              <p>LE: If Pr(COVID&#124;r)≤0.5, then NO_COVID, else COVID.</p>
            </list-item>
            <list-item>
              <p>LT: If Pr(COVID&#124;r)&#60;0.5, then NO_COVID, else COVID.</p>
            </list-item>
            <list-item>
              <p>NEQ: If Pr(COVID&#124;r)&#60;0.5, then NO_COVID, elseif Pr(COVID&#124;r)&#62;0.5, then COVID.</p>
            </list-item>
          </list>
          <p>Note that NEQ ignores cases on the 0.5 boundary.</p>
        </sec>
      </sec>
      <sec>
        <title>Methodology</title>
        <p>A schematic of our methodology to triage and diagnose patients based on their social posts is shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. Here, the circles denote the steps followed in the pipeline. We now detail each of these steps.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>A block diagram of the COVID-19 triage-and-diagnosis text processing pipeline. CRF: conditional random field; RB: rule based; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="jmir_v24i2e30397_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Step 1: Concept Extraction</title>
          <p>In the first step, we preprocessed each patient’s post by splitting it into sentences and tokens using General Architecture for Text Engineering (GATE) software’s (University of Sheffield) [<xref ref-type="bibr" rid="ref37">37</xref>] built-in NLP pipeline. For each token in a sentence, we built discrete features that signal whether the token is a member of 1 of the following dictionaries: (1) Symptom, (2) Severity, (3) Duration, (4) Intensifier, and (5) Negation. The dictionaries were built by analyzing the posts while annotating them. We also utilized the MetaMap system [<xref ref-type="bibr" rid="ref29">29</xref>], assuming that it contains all the necessary technical terms, to map tokens to 3 useful semantic categories: <italic>Sign or Symptom</italic>; <italic>Disease or Syndrome</italic>; and <italic>Body Part, Organ, or Organ Component</italic>. Due to the assumption regarding medical terms, the system does not expect any new additional terms, and thus we were justified in extracting concepts and relations in preprocessing steps. The preprocessed text was then used to build a concept extraction module to recognize the classes, shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, by applying a CRF [<xref ref-type="bibr" rid="ref12">12</xref>]. A detailed description of our CRF training methodology can be found in Hasan et al [<xref ref-type="bibr" rid="ref38">38</xref>]. The extracted concepts were then used for our next step to recognize the relations between concepts.</p>
        </sec>
        <sec>
          <title>Step 2: Relation Extraction</title>
          <p>The semantic relation between a symptom and other concepts, which we formally termed <italic>modifiers</italic>, was resolved using an unsupervised RB classifier algorithm. We first filtered all symptom and modifier pairs from a sentence within a predefined distance and then selected the closest modifier to a symptom to construct a relation. In total, we extracted 5 kinds of relations as follows: <italic>(SYM, SEVERITY)</italic>, <italic>(SYM, DURATION)</italic>, <italic>(SYM, BPOC)</italic>, <italic>(SYM, NEGATION)</italic>, and <italic>(SYM, ?)</italic>—here, SYM and BPOC refer to symptoms, and body part, organ, or organ component, respectively.</p>
          <p>The severity modifiers were mapped to a scale of 1-5; the semantic meaning of the scale was <italic>very mild</italic>, <italic>mild</italic>, <italic>moderate</italic>, <italic>severe</italic>, and <italic>very severe</italic>, respectively. The duration modifiers were also mapped to real values in chunks of weeks. So, for example, <italic>10 days</italic> was mapped to the value <italic>1.43</italic>.</p>
        </sec>
        <sec>
          <title>Step 3: Vector Representation</title>
          <p>Fixed-length vector representations suitable as input for SVM classifiers were built as follows:</p>
          <list list-type="bullet">
            <list-item>
              <p><italic>Symptom-only</italic> vector representation: Let &#60;S<sub>0</sub>, S<sub>1</sub>, . . . , S<sub>n</sub>&#62; be a vector of symptoms constructed from the symptom vocabulary; for our data set, the number of unique symptom words/phrases was n=871. To construct the vector representation for a post, we extracted the concept, <italic>SYM</italic>, and the relation (<italic>SYM</italic>, <italic>NEGATION</italic>) and set S<sub>i</sub> to 1, 0, or –1 according to whether the symptom was present, not present, or negated, respectively.</p>
            </list-item>
            <list-item>
              <p><italic>Symptom-modifier relation vector</italic> representation: The symptom-modifier relation vector is a much larger vector than the symptom-only vector and comprises 3 appended vectors containing (1) the absence or presence of 110 unique body parts, (2) the absence or value of a symptom duration, and (3) the absence, negation, or value or a symptom severity.</p>
            </list-item>
          </list>
        </sec>
        <sec>
          <title>Step 4: Triage and Diagnosis</title>
          <p>We utilized SVM classification and regression models to triage and diagnose patients’ posts, respectively, from the vector representations described earlier. For question 1, the recommendation from a doctor or a combination of doctors was the class label of the post; see the Problem setting subsection in the Methods section for a description. To build a binary classifier, we first combined the <italic>Send to a GP</italic> and <italic>Send to a hospital</italic> recommendations to represent a single class, <italic>Send</italic>. The SVM was trained to distinguish between the <italic>Stay at home</italic> and the <italic>Send</italic> options; we called this <italic>SVM classifier 1</italic>. Next, the posts labeled as <italic>Stay at home</italic> were discarded and <italic>SVM classifier 2</italic> was built utilizing the remaining posts to classify the <italic>Send to a GP</italic> and <italic>Send to a hospital</italic> recommendations. This resulted in a hierarchical classifier for COVID-19 triage.</p>
          <p>For diagnosing COVID-19 cases, we deployed a variant of the SVM, called <italic>support vector regression</italic> (SVR) [<xref ref-type="bibr" rid="ref9">9</xref>], to estimate the probability of COVID-19. We used the GTP that was derived from answers to question 2 as the dependent variable. SVR takes as input a high-dimensional feature vector, such as a symptom-only or a symptom-modifier relation vector representation, as described earlier. Classification was performed using the 3 decision functions, LE, LT, and NEQ, described previously.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Evaluation</title>
        <p>We evaluated the performance of the CRF and SVM classification algorithms using the standard measures of precision, recall, and macro- and microaveraged F<sub>1</sub> scores [<xref ref-type="bibr" rid="ref39">39</xref>]. Macroaveraged scores were computed by considering the score independently for each class and then taking the average, while microaveraged scores were computed by considering all the classes together. As our data set was not balanced with <italic>COVID</italic> and <italic>NO_COVID</italic> classes, as can be seen in <xref rid="figure4" ref-type="fig">Figure 4</xref>, and we wished to give equal weight to all instances, we reported microaveraged scores for the SVR classification. In contrast, in the case of concept extraction, the <italic>Other</italic> class dominated. So, in this case, we reported the macroaveraged scores for the CRF classification results.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Support ratio of triage classes across models for question 1 classification tasks. Absolute numbers for the "Send to a hospital" class in test sets were as follows: A=10, B=12, AB(R-a)=14, AB(R-t)=5, BC(R-a)=6, AC(R-a)=5, and ABC(R-a)=9; the value for the remaining models was 0. GP: general physician.</p>
          </caption>
          <graphic xlink:href="jmir_v24i2e30397_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Experimental Setup</title>
        <p>For the CRF, we reported 3-fold cross-validated macroaveraged results. Specifically, we trained each fold by a Python wrapper [<xref ref-type="bibr" rid="ref40">40</xref>] for CRFsuite; see Okazaki [<xref ref-type="bibr" rid="ref41">41</xref>]. For relation extraction, we ran our unsupervised RB algorithm on the 500 posts and calculated the F<sub>1</sub> scores by varying distances considering the 2 cases with and without stop words.</p>
        <p>We constructed SVM binary classifiers, SVM classifier 1 and SVM classifier 2, using the Python wrapper for LIBSVM [<xref ref-type="bibr" rid="ref42">42</xref>] implemented in Sklearn [<xref ref-type="bibr" rid="ref43">43</xref>] with both linear and Gaussian <italic>radial basis function</italic> (RBF) kernels [<xref ref-type="bibr" rid="ref10">10</xref>]. Similarly, SVR [<xref ref-type="bibr" rid="ref44">44</xref>] was implemented using LIBSVM and was built with both linear and RBF kernels. The hyperparameters (C=10 for the penalty, γ=0.01 for the RBF kernel, and ε=0.5 for the threshold) were discovered using a grid search [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
        <p>We simulated 2 cases for COVID-19 triage and diagnosis. First SVM and SVR models were trained with the ground truth to examine the predictive performance when they are deployed as stand-alone applications. Second, when trained with the predictions from the CRF and RB classifier, they resembled an end-to-end NLP application. To obtain a comparable result, the models were always tested with the ground truth. As a measure of performance, we reported macro- and microaveraged F<sub>1</sub> scores for SVM classifiers and SVR, respectively.</p>
      </sec>
      <sec>
        <title>Evaluation Outcomes</title>
        <p>The concept and relation extraction phases produced excellent and good predictive performances, respectively; see <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref>. The triage classification results from question 1 are shown in <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>; the full enumeration can be seen in the first column. When we trained the models with the symptom-modifier vector representations from the ground truth, the results of SVM classifier 1 and SVM classifier 2 were in the range of 72%-93% and 83%-96%, respectively. The symptom-only vector representations produced results in the range of 71%-94% and 79%-95%. These results suggested that we can achieve good predictive performance for classifying <italic>Stay at home</italic> and <italic>Send</italic> and for <italic>Send to a GP</italic> and <italic>Send to a hospital</italic>. In general, risk-tolerant models achieved better performance than risk-averse models. However, since in the test set, posts with the label <italic>Send to a hospital</italic> were missing for some models (as can be seen from <xref rid="figure5" ref-type="fig">Figure 5</xref>), we could not report them. We reported macroaveraged F<sub>1</sub> score results since question 1 was framed as a decision problem, where weights for the classes are a priori equal. The results obtained after training with CRF predictions were in similar ranges for both representations and classifiers. This is important, because it indicated that an end-to-end NLP application is likely to produce similar predictive performance.</p>
        <p>Regarding question 2, when we trained the models with the symptom-modifier vector representation from the ground truth, the results of COVID-19 diagnosis were in the range of 72%-87%, 61%-76%, and 74%-87% for the LE, LT, and NEQ decision functions, respectively; see <xref ref-type="table" rid="table6">Table 6</xref>. The symptom-only vector representation produced results in the range of 70%-88%, 59%-79%, and 74%-87% for the LE, LT, and NEQ decision functions, respectively.</p>
        <p>In general, NEQ models perform better due to the omission of borderline cases where the GTPs are exactly 0.5. The support ratios for each model for different decision functions are shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. When we trained the models with the symptom-modifier vector representation from the CRF predictions, the results were in the range of 68%-86%, 64%-76%, and 73%-87% for the LE, LT, and NEQ decision functions, respectively. This indicated that for diagnosis as well as triage, an end-to-end NLP application is likely to perform similarly to stand-alone applications. Here, we reported microaveraged F<sub>1</sub> scores since, in our data set, <italic>NO_COVID</italic> cases dominated; this largely resembled the natural distribution in the population, where people who tested positive for coronavirus are a relatively low percentage in the whole population, even when the prevalence of the virus is high.</p>
        <p>Finally, we trained our models using a linear kernel but found that the RBF dominates in most of the cases; however, linear kernels are useful in finding feature importance [<xref ref-type="bibr" rid="ref45">45</xref>].</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Concept extraction using CRF<sup>a</sup> on 3-fold cross-validation.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Label</td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
                <td>Support</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>SYM<sup>b</sup></td>
                <td>0.94</td>
                <td>0.97</td>
                <td>0.95</td>
                <td>1300</td>
              </tr>
              <tr valign="top">
                <td>SEVERITY</td>
                <td>0.80</td>
                <td>0.79</td>
                <td>0.79</td>
                <td>437</td>
              </tr>
              <tr valign="top">
                <td>BPOC<sup>c</sup></td>
                <td>0.92</td>
                <td>0.83</td>
                <td>0.87</td>
                <td>356</td>
              </tr>
              <tr valign="top">
                <td>DURATION</td>
                <td>0.87</td>
                <td>0.91</td>
                <td>0.89</td>
                <td>667</td>
              </tr>
              <tr valign="top">
                <td>INTENSIFIER</td>
                <td>0.88</td>
                <td>0.97</td>
                <td>0.92</td>
                <td>494</td>
              </tr>
              <tr valign="top">
                <td>NEGATION</td>
                <td>0.83</td>
                <td>0.89</td>
                <td>0.86</td>
                <td>338</td>
              </tr>
              <tr valign="top">
                <td>OTHER</td>
                <td>0.99</td>
                <td>0.98</td>
                <td>0.98</td>
                <td>16892</td>
              </tr>
              <tr valign="top">
                <td>Macroaverage</td>
                <td>0.89</td>
                <td>0.89</td>
                <td>0.89</td>
                <td>—<sup>d</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>CRF: conditional random field.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>SYM: symptoms.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>BPOC: body part, organ, or organ component.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Relation extraction using RB<sup>a</sup> classifier results on 3-fold cross-validation.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="150"/>
            <col width="140"/>
            <col width="140"/>
            <col width="0"/>
            <col width="150"/>
            <col width="140"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Distance</td>
                <td colspan="4">With stop words</td>
                <td colspan="3">Without stop words</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>2</td>
                <td>0.74</td>
                <td>0.63</td>
                <td>0.68</td>
                <td colspan="2">0.74</td>
                <td>0.64</td>
                <td>0.69</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>0.75</td>
                <td>0.67</td>
                <td>0.71</td>
                <td colspan="2">0.75</td>
                <td>0.67</td>
                <td>0.71</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>0.75</td>
                <td>0.69</td>
                <td>0.72</td>
                <td colspan="2">0.75</td>
                <td>0.69</td>
                <td>0.72</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>0.75</td>
                <td>0.71</td>
                <td>0.73</td>
                <td colspan="2">0.74</td>
                <td>0.71</td>
                <td>0.73</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>0.74</td>
                <td>0.72</td>
                <td>0.73</td>
                <td colspan="2">0.74</td>
                <td>0.72</td>
                <td>0.73</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>0.73</td>
                <td>0.73</td>
                <td>0.73</td>
                <td colspan="2">0.73</td>
                <td>0.73</td>
                <td>0.73</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>RB: rule based.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Question 1: hierarchical classification results for the RBF<sup>a</sup> kernel using the symptom-modifier relation vector.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="140"/>
            <col width="0"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="0"/>
            <col width="140"/>
            <col width="140"/>
            <col width="130"/>
            <col width="0"/>
            <col width="0"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Model</td>
                <td colspan="4">SVM<sup>b</sup> classifier 1</td>
                <td colspan="4">SVM classifier 2</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="12">
                  <bold>Trained on the ground truth</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>A</td>
                <td colspan="2">0.82</td>
                <td>0.91</td>
                <td>0.86</td>
                <td colspan="2">0.73</td>
                <td>0.95</td>
                <td>0.83</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>B</td>
                <td colspan="2">0.73</td>
                <td>0.77</td>
                <td>0.75</td>
                <td colspan="2">0.81</td>
                <td>0.99</td>
                <td>0.89</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>C</td>
                <td colspan="2">0.85</td>
                <td>0.98</td>
                <td>0.91</td>
                <td colspan="2">—<sup>c</sup></td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB(R-a)</td>
                <td colspan="2">0.70</td>
                <td>0.75</td>
                <td>0.72</td>
                <td colspan="2">0.80</td>
                <td>0.96</td>
                <td>0.88</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB(R-t)</td>
                <td colspan="2">0.84</td>
                <td>0.96</td>
                <td>0.89</td>
                <td colspan="2">0.85</td>
                <td>1.00</td>
                <td>0.92</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC(R-a)</td>
                <td colspan="2">0.72</td>
                <td>0.75</td>
                <td>0.73</td>
                <td colspan="2">0.92</td>
                <td>1.00</td>
                <td>0.96</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC(R-t)</td>
                <td colspan="2">0.86</td>
                <td>0.99</td>
                <td>0.92</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC(R-a)</td>
                <td colspan="2">0.79</td>
                <td>0.87</td>
                <td>0.83</td>
                <td colspan="2">0.89</td>
                <td>1.00</td>
                <td>0.94</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC(R-t)</td>
                <td colspan="2">0.88</td>
                <td>0.98</td>
                <td>0.93</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC(R-a)</td>
                <td colspan="2">0.70</td>
                <td>0.76</td>
                <td>0.73</td>
                <td colspan="2">0.89</td>
                <td>0.99</td>
                <td>0.93</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC(R-t)</td>
                <td colspan="2">0.88</td>
                <td>0.99</td>
                <td>0.93</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>Trained on the CRF<sup>d</sup> predictions</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>A</td>
                <td colspan="2">0.81</td>
                <td>0.89</td>
                <td>0.85</td>
                <td colspan="2">0.72</td>
                <td>0.91</td>
                <td>0.80</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>B</td>
                <td colspan="2">0.74</td>
                <td>0.74</td>
                <td>0.74</td>
                <td colspan="2">0.81</td>
                <td>0.99</td>
                <td>0.89</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>C</td>
                <td colspan="2">0.85</td>
                <td>0.96</td>
                <td>0.90</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB(R-a)</td>
                <td colspan="2">0.73</td>
                <td>0.71</td>
                <td>0.71</td>
                <td colspan="2">0.81</td>
                <td>0.96</td>
                <td>0.88</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB(R-t)</td>
                <td colspan="2">0.84</td>
                <td>0.94</td>
                <td>0.88</td>
                <td colspan="2">0.84</td>
                <td>1.00</td>
                <td>0.92</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC(R-a)</td>
                <td colspan="2">0.74</td>
                <td>0.71</td>
                <td>0.72</td>
                <td colspan="2">0.92</td>
                <td>1.00</td>
                <td>0.96</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC(R-t)</td>
                <td colspan="2">0.88</td>
                <td>0.98</td>
                <td>0.93</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC(R-a)</td>
                <td colspan="2">0.81</td>
                <td>0.85</td>
                <td>0.83</td>
                <td colspan="2">0.89</td>
                <td>1.00</td>
                <td>0.94</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC(R-t)</td>
                <td colspan="2">0.88</td>
                <td>0.98</td>
                <td>0.93</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC(R-a)</td>
                <td colspan="2">0.72</td>
                <td>0.72</td>
                <td>0.72</td>
                <td colspan="2">0.89</td>
                <td>1.00</td>
                <td>0.94</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC(R-t)</td>
                <td colspan="2">0.89</td>
                <td>0.98</td>
                <td>0.93</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>RBF: radial basis function.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>Not applicable.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>CRF: conditional random field.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Question 1: hierarchical classification results for the RBF<sup>a</sup> kernel using the symptom-only vector.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="0"/>
            <col width="140"/>
            <col width="140"/>
            <col width="130"/>
            <col width="0"/>
            <col width="0"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Model</td>
                <td colspan="4">SVM<sup>b</sup> classifier 1</td>
                <td colspan="4">SVM classifier 2</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">Precision</td>
                <td>Recall</td>
                <td>F<sub>1</sub> score</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="11">
                  <bold>Trained on the ground truth</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>A</td>
                <td>0.83</td>
                <td>0.91</td>
                <td>0.87</td>
                <td colspan="2">0.74</td>
                <td>0.85</td>
                <td>0.79</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>B</td>
                <td>0.71</td>
                <td>0.81</td>
                <td>0.76</td>
                <td colspan="2">0.81</td>
                <td>0.98</td>
                <td>0.89</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>C</td>
                <td>0.87</td>
                <td>0.97</td>
                <td>0.92</td>
                <td colspan="2">—<sup>c</sup></td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB(R-a)</td>
                <td>0.69</td>
                <td>0.75</td>
                <td>0.72</td>
                <td colspan="2">0.83</td>
                <td>0.96</td>
                <td>0.89</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB(R-t)</td>
                <td>0.85</td>
                <td>0.94</td>
                <td>0.89</td>
                <td colspan="2">0.85</td>
                <td>1.00</td>
                <td>0.92</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC(R-a)</td>
                <td>0.71</td>
                <td>0.79</td>
                <td>0.75</td>
                <td colspan="2">0.92</td>
                <td>0.99</td>
                <td>0.95</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC(R-t)</td>
                <td>0.88</td>
                <td>0.98</td>
                <td>0.93</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC(R-a)</td>
                <td>0.80</td>
                <td>0.86</td>
                <td>0.83</td>
                <td colspan="2">0.89</td>
                <td>1.00</td>
                <td>0.94</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC(R-t)</td>
                <td>0.90</td>
                <td>0.98</td>
                <td>0.94</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC(R-a)</td>
                <td>0.68</td>
                <td>0.74</td>
                <td>0.71</td>
                <td colspan="2">0.90</td>
                <td>1.00</td>
                <td>0.95</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC(R-t)</td>
                <td>0.90</td>
                <td>0.98</td>
                <td>0.94</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Trained on the CRF<sup>d</sup> predictions</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>A</td>
                <td>0.84</td>
                <td>0.89</td>
                <td>0.87</td>
                <td colspan="2">0.74</td>
                <td>0.82</td>
                <td>0.78</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>B</td>
                <td>0.74</td>
                <td>0.79</td>
                <td>0.77</td>
                <td colspan="2">0.82</td>
                <td>0.98</td>
                <td>0.89</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>C</td>
                <td>0.86</td>
                <td>0.95</td>
                <td>0.90</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB(R-a)</td>
                <td>0.72</td>
                <td>0.76</td>
                <td>0.73</td>
                <td colspan="2">0.83</td>
                <td>0.92</td>
                <td>0.87</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB(R-t)</td>
                <td>0.87</td>
                <td>0.93</td>
                <td>0.90</td>
                <td colspan="2">0.84</td>
                <td>0.98</td>
                <td>0.90</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC(R-a)</td>
                <td>0.72</td>
                <td>0.78</td>
                <td>0.75</td>
                <td colspan="2">0.92</td>
                <td>0.99</td>
                <td>0.95</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC(R-t)</td>
                <td>0.87</td>
                <td>0.97</td>
                <td>0.92</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC(R-a)</td>
                <td>0.80</td>
                <td>0.86</td>
                <td>0.83</td>
                <td colspan="2">0.89</td>
                <td>1.00</td>
                <td>0.94</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC(R-t)</td>
                <td>0.89</td>
                <td>0.95</td>
                <td>0.92</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC(R-a)</td>
                <td>0.71</td>
                <td>0.76</td>
                <td>0.73</td>
                <td colspan="2">0.89</td>
                <td>0.99</td>
                <td>0.93</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC(R-t)</td>
                <td>0.90</td>
                <td>0.95</td>
                <td>0.92</td>
                <td colspan="2">—</td>
                <td>—</td>
                <td>—</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>RBF: radial basis function.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>Not applicable.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>CRF: conditional random field.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Support ratio of diagnosis classes across models and 3 decision functions for question 2 classification tasks.</p>
          </caption>
          <graphic xlink:href="jmir_v24i2e30397_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Question 2: microaveraged F<sub>1</sub> score results for different models and decision functions. Here, A, B, and C are 3 medical doctors (abbreviated as Dr) who took part in the experiment.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Model</td>
                <td colspan="3">Symptom-modifier vector</td>
                <td colspan="3">Symptom-only vector</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>LE</td>
                <td>LT</td>
                <td>NEQ</td>
                <td>LE</td>
                <td>LT</td>
                <td>NEQ</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Trained on the ground truth</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>A</td>
                <td>0.72</td>
                <td>0.61</td>
                <td>0.78</td>
                <td>0.70</td>
                <td>0.59</td>
                <td>0.74</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>B</td>
                <td>0.78</td>
                <td>0.61</td>
                <td>0.76</td>
                <td>0.78</td>
                <td>0.62</td>
                <td>0.77</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>C</td>
                <td>0.87</td>
                <td>0.75</td>
                <td>0.87</td>
                <td>0.88</td>
                <td>0.75</td>
                <td>0.87</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB</td>
                <td>0.72</td>
                <td>0.66</td>
                <td>0.74</td>
                <td>0.74</td>
                <td>0.65</td>
                <td>0.75</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC</td>
                <td>0.84</td>
                <td>0.76</td>
                <td>0.84</td>
                <td>0.85</td>
                <td>0.79</td>
                <td>0.86</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC</td>
                <td>0.81</td>
                <td>0.73</td>
                <td>0.81</td>
                <td>0.83</td>
                <td>0.74</td>
                <td>0.83</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC</td>
                <td>0.74</td>
                <td>0.67</td>
                <td>0.76</td>
                <td>0.75</td>
                <td>0.67</td>
                <td>0.77</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Trained on the CRF<sup>a</sup> predictions</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>A</td>
                <td>0.68</td>
                <td>0.64</td>
                <td>0.76</td>
                <td>0.50</td>
                <td>0.79</td>
                <td>0.74</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>B</td>
                <td>0.76</td>
                <td>0.64</td>
                <td>0.77</td>
                <td>0.78</td>
                <td>0.57</td>
                <td>0.74</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>C</td>
                <td>0.86</td>
                <td>0.75</td>
                <td>0.87</td>
                <td>0.87</td>
                <td>0.74</td>
                <td>0.86</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AB</td>
                <td>0.70</td>
                <td>0.65</td>
                <td>0.73</td>
                <td>0.71</td>
                <td>0.66</td>
                <td>0.74</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BC</td>
                <td>0.83</td>
                <td>0.76</td>
                <td>0.83</td>
                <td>0.85</td>
                <td>0.78</td>
                <td>0.86</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AC</td>
                <td>0.80</td>
                <td>0.74</td>
                <td>0.82</td>
                <td>0.80</td>
                <td>0.73</td>
                <td>0.81</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ABC</td>
                <td>0.72</td>
                <td>0.69</td>
                <td>0.76</td>
                <td>0.74</td>
                <td>0.69</td>
                <td>0.77</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>CRF: conditional random field.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study demonstrates the potential to triage and diagnose COVID-19 patients from their social media posts. We presented a proof-of-concept system to predict a patient’s health state by building machine learning models from their narrative. The models were trained in 2 ways: using (1) ground-truth labels and (2) predictions obtained from the NLP pipeline. Trained models are always tested on ground-truth labels. We obtained good performances in both cases, which indicates that an automated NLP pipeline could be used to triage and diagnose patients from their narrative; see the Evaluation Outcomes subsection in the Results section. In general, health professionals and researchers could deploys triage models to determine the severity of COVID-19 cases in the population and diagnostic models to gauge the prevalence of the pandemic.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>To quantify the important predictive features in the training set, we experimented with COVID-19 diagnosis using linear kernel SVR regression. More specifically, we used the symptom-only vector representation constructed from the ground truth. We summed feature weights for each S<sub>i</sub> in &#60;S<sub>0</sub>, S<sub>1</sub>, . . . , S<sub>n</sub>&#62; from the 7 models and the 3 decision functions; see the Methods section. The features were then mapped to the categories found in the Twitter COVID-19 lexicon complied by Sarker et al [<xref ref-type="bibr" rid="ref4">4</xref>]. The top 5 important features in our data set were <italic>cough</italic>, <italic>anosmia/agusia</italic>, <italic>dyspnea</italic>, <italic>pyrexia</italic>, and <italic>fatigue</italic>. Mizrahi et al [<xref ref-type="bibr" rid="ref17">17</xref>] quoted 4 of these symptoms as the most prevalent coronavirus symptoms, strongly correlating with our findings.</p>
        <p>To compare our importance ranking with that of Sarker et al’s [<xref ref-type="bibr" rid="ref4">4</xref>] frequent categories, we compiled the corresponding frequencies of our 5 most important symptoms. Normalized weights and frequencies were then plotted in <xref rid="figure6" ref-type="fig">Figure 6</xref>. The top-left stacked bar chart compares our 5 most important features with Sarker et al’s [<xref ref-type="bibr" rid="ref4">4</xref>] frequencies. Cough was the most important symptom from our data set, where it was the second-most frequent. Anosmia/ageusia ranked second in our importance list, while it was seventh in the most frequent list. Pyrexia came first and fourth in both the frequent and importance lists, respectively.</p>
        <p>The top-right chart in <xref rid="figure6" ref-type="fig">Figure 6</xref> shows a comparison between Sarker et al’s [<xref ref-type="bibr" rid="ref4">4</xref>] frequency ranking and our importance ranking. Here, we selected the top 5 most frequent symptoms from Sarker et al’s [<xref ref-type="bibr" rid="ref4">4</xref>] frequency list and normalized them. These are <italic>pyrexia</italic>, <italic>cough</italic>, <italic>body ache</italic>, <italic>fatigue</italic>, and <italic>headache</italic>. We took the corresponding importance weights of these symptoms and plotted them in a stacked bar chart. Here, headache ranked 22<sup>nd</sup> in our importance ranking, while it was 5<sup>th</sup> in the frequency ranking. We found a large difference between the 2 rankings, implying that the top-most frequent symptoms are not necessarily the most important ones.</p>
        <p>Next, we compared our most important feature weights with our data set’s frequency ranking using the methods described earlier. From the bottom-left stacked bar chart of <xref rid="figure6" ref-type="fig">Figure 6</xref>, we observed that anosmia/ageusia were relatively low in order in the frequency ranking (ie, 11<sup>th</sup>). As in Sarker et al’s [<xref ref-type="bibr" rid="ref4">4</xref>] ranking, cough came second in our data set’s frequency ranking.</p>
        <p>Finally, the bottom-right chart in <xref rid="figure6" ref-type="fig">Figure 6</xref> refers to the comparison between our data set’s frequency and importance rankings for the corresponding symptoms. We observed that anxiety ranked 4<sup>th</sup> in the frequency list, while it was low (ie, 23<sup>rd</sup>) in the importance ranking.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Feature comparison between our most important features and Sarker et al’s [<xref ref-type="bibr" rid="ref4">4</xref>] most frequent symptoms (top row) and between our most important features and our most frequent symptoms (bottom row). The feature importance rankings are obtained from an SVM linear kernel using the symptom-only vector representation. SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="jmir_v24i2e30397_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>It is worth reiterating that social media posts, which are known to be noisy, are not on a par with the consultation that a patient would have with a doctor. We stress that the aim of this study is to extract useful information at a population level, rather than to provide an actionable decision for an individual via social media posts. Our manually annotated data set has 2 main limitations. First, having only 3 experts limited the quality of our labeling, although we deem this study to be a proof of concept. A larger number of experts, including more senior doctors, would be beneficial in a follow-up study. The robustness of our results could be further improved by both increasing the size of our data set and introducing posts from several alternate sources. Given that the posts come from social media, it is not clear whether the results could be used as such in a diagnostic system, without combining them with actual consultations. However, it is worth noting that medical social media, such as the posts we used herein, may uncover novel information regarding COVID-19.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>The coronavirus pandemic has drawn a spotlight on the need to develop automated processes to provide additional information to researchers, health professionals, and decision makers. Medical social media comprises a rich resource of timely information that could fit this purpose. We have demonstrated that it is possible to take an approach that aims at the detection of COVID-19 using an automated triage and diagnosis system in order to augment public health surveillance systems, despite the heterogeneous nature of typical social media posts. The outputs from such an approach could be used to indicate the severity and estimate the prevalence of the disease in the population.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BPOC</term>
          <def>
            <p>body part, organ, or organ component</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CT</term>
          <def>
            <p>computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">GP</term>
          <def>
            <p>general physician</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">GTP</term>
          <def>
            <p>ground-truth probability</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">RB</term>
          <def>
            <p>rule based</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">RBF</term>
          <def>
            <p>radial basis function</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SVR</term>
          <def>
            <p>support vector regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">SYM</term>
          <def>
            <p>symptoms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">XGB</term>
          <def>
            <p>gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="con">
        <p>All authors were involved in the design of the work. The first author wrote the code. The first 3 authors drafted the paper, and all authors critically revised the article.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <source>Overview: Coronavirus (COVID-19)</source>
          <access-date>2021-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://web.archive.org/web/20200316223405/https://www.nhs.uk/conditions/coronavirus-covid-19/">https://web.archive.org/web/20200316223405/https://www.nhs.uk/conditions/coronavirus-covid-19/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obeid</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Meystre</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Heider</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>O'Bryan</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Lenert</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>An artificial intelligence approach to COVID-19 infection risk assessment in virtual visits: a case report</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>08</month>
          <day>01</day>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>1321</fpage>
          <lpage>1325</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32449766"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa105</pub-id>
          <pub-id pub-id-type="medline">32449766</pub-id>
          <pub-id pub-id-type="pii">5843795</pub-id>
          <pub-id pub-id-type="pmcid">PMC7313981</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwab</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>DuMont Schütte</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dietz</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Clinical predictive models for COVID-19: systematic study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>06</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e21439</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e21439/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21439</pub-id>
          <pub-id pub-id-type="medline">32976111</pub-id>
          <pub-id pub-id-type="pii">v22i10e21439</pub-id>
          <pub-id pub-id-type="pmcid">PMC7541040</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lakamana</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hogg-Bremer</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Garadi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Self-reported COVID-19 symptoms on Twitter: an analysis and a research resource</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>08</month>
          <day>01</day>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>1310</fpage>
          <lpage>1315</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32620975"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa116</pub-id>
          <pub-id pub-id-type="medline">32620975</pub-id>
          <pub-id pub-id-type="pii">5867237</pub-id>
          <pub-id pub-id-type="pmcid">PMC7337747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shia</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Prediction of number of cases of 2019 novel coronavirus (COVID-19) using social media search index</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2020</year>
          <month>03</month>
          <day>31</day>
          <volume>17</volume>
          <issue>7</issue>
          <fpage>2365</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph17072365"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph17072365</pub-id>
          <pub-id pub-id-type="medline">32244425</pub-id>
          <pub-id pub-id-type="pii">ijerph17072365</pub-id>
          <pub-id pub-id-type="pmcid">PMC7177617</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aramaki</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Maskawa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Morita</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Twitter catches the flu: detecting influenza epidemics using Twitter</article-title>
          <source>Proc EMNLP</source>
          <year>2011</year>
          <fpage>1568</fpage>
          <lpage>1576</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Langley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Avram</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Prediction of influenza-like illness based on the improved artificial tree algorithm and artificial neural network</article-title>
          <source>Sci Rep</source>
          <year>2018</year>
          <month>03</month>
          <day>20</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>4895</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-018-23075-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-018-23075-1</pub-id>
          <pub-id pub-id-type="medline">29559649</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-018-23075-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5861130</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Likert scales, levels of measurement and the "laws" of statistics</article-title>
          <source>Adv Health Sci Educ Theory Pract</source>
          <year>2010</year>
          <month>12</month>
          <day>10</day>
          <volume>15</volume>
          <issue>5</issue>
          <fpage>625</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1007/s10459-010-9222-y</pub-id>
          <pub-id pub-id-type="medline">20146096</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Drucker</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Burges</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Smola</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support vector regression machines</article-title>
          <year>1996</year>
          <conf-name>NIPS'96: Proceedings of the 9th International Conference on Neural Information Processing Systems.  Dec Presented atth International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 1996</conf-date>
          <conf-loc>Denver, CO</conf-loc>
          <fpage>2</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1109/iconip.2002.1198219</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marsland</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>Machine Learning: An Algorithmic Perspective. 2nd ed</source>
          <year>2014</year>
          <publisher-loc>Boca Raton, FL</publisher-loc>
          <publisher-name>Chapman &#38; Hall/CRC</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arrieta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>García-Prado</surname>
              <given-names>Ariadna</given-names>
            </name>
            <name name-style="western">
              <surname>González</surname>
              <given-names>Paula</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto-Prades</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Risk attitudes in medical decisions for others: an experimental approach</article-title>
          <source>Health Econ</source>
          <year>2017</year>
          <month>12</month>
          <day>29</day>
          <volume>26 Suppl 3</volume>
          <fpage>97</fpage>
          <lpage>113</lpage>
          <pub-id pub-id-type="doi">10.1002/hec.3628</pub-id>
          <pub-id pub-id-type="medline">29285873</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>An introduction to conditional random fields</article-title>
          <source>Found Trends Mach Learn</source>
          <year>2012</year>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>267</fpage>
          <lpage>373</lpage>
          <pub-id pub-id-type="doi">10.1561/2200000013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bach</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Badaskar</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A review of relation extraction</article-title>
          <source>Lit Rev Lang Stat II</source>
          <year>2007</year>
          <volume>2</volume>
          <fpage>15</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Edo-Osagie</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>De La Iglesia</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lake</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Edeghere</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>A scoping review of the use of Twitter for public health research</article-title>
          <source>Comput Biol Med</source>
          <year>2020</year>
          <month>07</month>
          <volume>122</volume>
          <fpage>103770</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32502758"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2020.103770</pub-id>
          <pub-id pub-id-type="medline">32502758</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(20)30142-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7229729</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brammertz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Herpich</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Südkamp</surname>
              <given-names>Norbert</given-names>
            </name>
            <name name-style="western">
              <surname>Hinterseer</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>App-based tracking of self-reported COVID-19 symptoms: analysis of questionnaire data</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>09</month>
          <day>09</day>
          <volume>22</volume>
          <issue>9</issue>
          <fpage>e21956</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/9/e21956/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21956</pub-id>
          <pub-id pub-id-type="medline">32791493</pub-id>
          <pub-id pub-id-type="pii">v22i9e21956</pub-id>
          <pub-id pub-id-type="pmcid">PMC7480999</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Menni</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Valdes</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Freidin</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Sudre</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Drew</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Ganesh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Varsavsky</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cardoso</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>El-Sayed Moustafa</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Visconti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hysi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>RCE</given-names>
            </name>
            <name name-style="western">
              <surname>Mangino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Falchi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ourselin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Steves</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Spector</surname>
              <given-names>TD</given-names>
            </name>
          </person-group>
          <article-title>Real-time tracking of self-reported symptoms to predict potential COVID-19</article-title>
          <source>Nat Med</source>
          <year>2020</year>
          <month>07</month>
          <day>11</day>
          <volume>26</volume>
          <issue>7</issue>
          <fpage>1037</fpage>
          <lpage>1040</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32393804"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41591-020-0916-2</pub-id>
          <pub-id pub-id-type="medline">32393804</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-020-0916-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7751267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mizrahi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Shilo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rossman</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kalkstein</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Barer</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Keshet</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shamir-Stein</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shalev</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Zohar</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Chodick</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Segal</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Longitudinal symptom dynamics of COVID-19 infection</article-title>
          <source>Nat Commun</source>
          <year>2020</year>
          <month>12</month>
          <day>04</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>6208</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-020-20053-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-020-20053-y</pub-id>
          <pub-id pub-id-type="medline">33277494</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-020-20053-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC7718370</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Balasubramani</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Nowalk</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Eng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Urbanski</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>McLean</surname>
              <given-names>HQ</given-names>
            </name>
            <name name-style="western">
              <surname>Belongia</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Monto</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Malosh</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Gaglani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Clipper</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Flannery</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wisniewski</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>Classification and Regression Tree (CART) analysis to predict influenza in primary care patients</article-title>
          <source>BMC Infect Dis</source>
          <year>2016</year>
          <month>09</month>
          <day>22</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>503</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcinfectdis.biomedcentral.com/articles/10.1186/s12879-016-1839-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12879-016-1839-x</pub-id>
          <pub-id pub-id-type="medline">27659721</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12879-016-1839-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC5034457</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brinati</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Campagner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrari</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Locatelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Banfi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cabitza</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Detection of COVID-19 infection from routine blood exams with machine learning: a feasibility study</article-title>
          <source>J Med Syst</source>
          <year>2020</year>
          <month>07</month>
          <day>01</day>
          <volume>44</volume>
          <issue>8</issue>
          <fpage>135</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32607737"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10916-020-01597-4</pub-id>
          <pub-id pub-id-type="medline">32607737</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10916-020-01597-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC7326624</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wynants</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Van Calster</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Heinze</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Schuit</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bonten</surname>
              <given-names>MMJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dahly</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Damen</surname>
              <given-names>JAA</given-names>
            </name>
            <name name-style="western">
              <surname>Debray</surname>
              <given-names>TPA</given-names>
            </name>
            <name name-style="western">
              <surname>de Jong</surname>
              <given-names>VMT</given-names>
            </name>
            <name name-style="western">
              <surname>De Vos</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dhiman</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Haller</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Harhay</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Henckaerts</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Heus</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kammer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kreuzberger</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lohmann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Luijken</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>McLernon</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Andaur Navarro</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Reitsma</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Sergeant</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Skoetz</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Smits</surname>
              <given-names>LJM</given-names>
            </name>
            <name name-style="western">
              <surname>Snell</surname>
              <given-names>KIE</given-names>
            </name>
            <name name-style="western">
              <surname>Sperrin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Spijker</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Takada</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tzoulaki</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>van Kuijk</surname>
              <given-names>SMJ</given-names>
            </name>
            <name name-style="western">
              <surname>van Bussel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>van der Horst</surname>
              <given-names>ICC</given-names>
            </name>
            <name name-style="western">
              <surname>van Royen</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Verbakel</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Wallisch</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wilkinson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wolff</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hooft</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>KGM</given-names>
            </name>
            <name name-style="western">
              <surname>van Smeden</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Prediction models for diagnosis and prognosis of covid-19: systematic review and critical appraisal</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <month>04</month>
          <day>07</day>
          <volume>369</volume>
          <fpage>m1328</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=32265220"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.m1328</pub-id>
          <pub-id pub-id-type="medline">32265220</pub-id>
          <pub-id pub-id-type="pmcid">PMC7222643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Judson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Odisho</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Neinstein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Moriarty</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gleason</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Intinarelli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzales</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Rapid design and implementation of an integrated patient self-triage and self-scheduling tool for COVID-19</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>06</month>
          <day>01</day>
          <volume>27</volume>
          <issue>6</issue>
          <fpage>860</fpage>
          <lpage>866</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32267928"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa051</pub-id>
          <pub-id pub-id-type="medline">32267928</pub-id>
          <pub-id pub-id-type="pii">5817825</pub-id>
          <pub-id pub-id-type="pmcid">PMC7184478</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A COVID-19 risk assessment decision support system for general practitioners: design and development study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>29</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e19786</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e19786/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19786</pub-id>
          <pub-id pub-id-type="medline">32540845</pub-id>
          <pub-id pub-id-type="pii">v22i6e19786</pub-id>
          <pub-id pub-id-type="pmcid">PMC7332157</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <article-title>Diagnosis of COVID-19 and its clinical spectrum AI and data science supporting clinical decision (from 28th Mar to 3rd Apr)</article-title>
          <source>Einstein Data4u</source>
          <access-date>2021-02-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.kaggle.com/einsteindata4u/covid19">https://www.kaggle.com/einsteindata4u/covid19</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zuo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Clinical and laboratory predictors of in-hospital mortality in patients with coronavirus disease-2019: a cohort study in Wuhan, China</article-title>
          <source>Clin Infect Dis</source>
          <year>2020</year>
          <month>11</month>
          <day>19</day>
          <volume>71</volume>
          <issue>16</issue>
          <fpage>2079</fpage>
          <lpage>2088</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32361723"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/cid/ciaa538</pub-id>
          <pub-id pub-id-type="medline">32361723</pub-id>
          <pub-id pub-id-type="pii">5828281</pub-id>
          <pub-id pub-id-type="pmcid">PMC7197616</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Somani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Russak</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>De Freitas</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhry</surname>
              <given-names>FF</given-names>
            </name>
            <name name-style="western">
              <surname>Paranjpe</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Richter</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Beckmann</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Naik</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Timsina</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lala</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paranjpe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Golden</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Danieletto</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>O'Reilly</surname>
              <given-names>PF</given-names>
            </name>
            <name name-style="western">
              <surname>Huckins</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kovatch</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Finkelstein</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Argulian</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kasarskis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Percha</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Aberg</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Bagiella</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Horowitz</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Nestler</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Schadt</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Cordon-Cardo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fuster</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Charney</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Bottinger</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Narula</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fayad</surname>
              <given-names>ZA</given-names>
            </name>
            <name name-style="western">
              <surname>Just</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Charney</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>GN</given-names>
            </name>
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>BS</given-names>
            </name>
          </person-group>
          <article-title>Machine learning to predict mortality and critical events in a cohort of patients with COVID-19 in New York City: model development and validation</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>11</month>
          <day>06</day>
          <volume>22</volume>
          <issue>11</issue>
          <fpage>e24018</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/11/e24018/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24018</pub-id>
          <pub-id pub-id-type="medline">33027032</pub-id>
          <pub-id pub-id-type="pii">v22i11e24018</pub-id>
          <pub-id pub-id-type="pmcid">PMC7652593</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Shweta</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Murugadoss</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Awasthi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatakrishnan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bade</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Augmented curation of clinical notes from a massive EHR system reveals symptoms of impending COVID-19 diagnosis</article-title>
          <source>Elife</source>
          <year>2020</year>
          <fpage>9</fpage>
          <pub-id pub-id-type="doi">10.7554/elife.58227</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Abu-El-Rub</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Manion</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rouhizadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 SignSym: a fast adaptation of a general clinical NLP tool to identify and normalize COVID-19 signs and symptoms to OMOP common data model</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>06</month>
          <day>12</day>
          <volume>28</volume>
          <issue>6</issue>
          <fpage>1275</fpage>
          <lpage>1283</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33674830"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab015</pub-id>
          <pub-id pub-id-type="medline">33674830</pub-id>
          <pub-id pub-id-type="pii">6155732</pub-id>
          <pub-id pub-id-type="pmcid">PMC7989301</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>López-Úbeda</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Díaz-Galiano</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Martín-Noguerol</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Luna</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ureña-López</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Martín-Valdivia</surname>
              <given-names>MT</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 detection in radiological text reports integrating entity recognition</article-title>
          <source>Comput Biol Med</source>
          <year>2020</year>
          <month>12</month>
          <volume>127</volume>
          <fpage>104066</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33130435"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2020.104066</pub-id>
          <pub-id pub-id-type="medline">33130435</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(20)30397-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC7577869</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>01</day>
          <volume>32</volume>
          <issue>Database issue</issue>
          <fpage>D267</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>Proc NIPS</source>
          <year>2013</year>
          <volume>2</volume>
          <fpage>3111</fpage>
          <lpage>3119</lpage>
          <pub-id pub-id-type="doi">10.5040/9781474284974.00399</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Egton Medical Information Systems Limited</collab>
          </person-group>
          <article-title>Coronavirus (COVID-19)</article-title>
          <source>Patient</source>
          <access-date>2022-02-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://patient.info/forums/discuss/browse/coronavirus-covid-19--4541">https://patient.info/forums/discuss/browse/coronavirus-covid-19--4541</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Vet</surname>
              <given-names>HCW</given-names>
            </name>
            <name name-style="western">
              <surname>Mokkink</surname>
              <given-names>LB</given-names>
            </name>
            <name name-style="western">
              <surname>Terwee</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Hoekstra</surname>
              <given-names>OS</given-names>
            </name>
            <name name-style="western">
              <surname>Knol</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Clinicians are right not to like Cohen's κ</article-title>
          <source>BMJ</source>
          <year>2013</year>
          <month>04</month>
          <day>12</day>
          <volume>346</volume>
          <issue>apr12 1</issue>
          <fpage>f2125</fpage>
          <lpage>f2125</lpage>
          <pub-id pub-id-type="doi">10.1136/bmj.f2125</pub-id>
          <pub-id pub-id-type="medline">23585065</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feinstein</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Cicchetti</surname>
              <given-names>DV</given-names>
            </name>
          </person-group>
          <article-title>High agreement but low kappa: I. The problems of two paradoxes</article-title>
          <source>J Clin Epidemiol</source>
          <year>1990</year>
          <month>1</month>
          <volume>43</volume>
          <issue>6</issue>
          <fpage>543</fpage>
          <lpage>549</lpage>
          <pub-id pub-id-type="doi">10.1016/0895-4356(90)90158-l</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gwet</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Computing inter-rater reliability and its variance in the presence of high agreement</article-title>
          <source>Br J Math Stat Psychol</source>
          <year>2008</year>
          <volume>61</volume>
          <issue>Pt 1</issue>
          <fpage>29</fpage>
          <lpage>48</lpage>
          <pub-id pub-id-type="doi">10.1348/000711006x126600</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wongpakaran</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wongpakaran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wedding</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gwet</surname>
              <given-names>KL</given-names>
            </name>
          </person-group>
          <article-title>A comparison of Cohen's kappa and Gwet's AC1 when calculating inter-rater reliability coefficients: a study conducted with personality disorder samples</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2013</year>
          <month>04</month>
          <day>29</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>61</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/1471-2288-13-61"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2288-13-61</pub-id>
          <pub-id pub-id-type="medline">23627889</pub-id>
          <pub-id pub-id-type="pii">1471-2288-13-61</pub-id>
          <pub-id pub-id-type="pmcid">PMC3643869</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Landis</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Koch</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>The measurement of observer agreement for categorical data</article-title>
          <source>Biometrics</source>
          <year>1977</year>
          <month>03</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>159</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="medline">843571</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cunningham</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Maynard</surname>
              <given-names>DK</given-names>
            </name>
          </person-group>
          <source>Text Processing with GATE (Version 6)</source>
          <year>2011</year>
          <publisher-loc>CA</publisher-loc>
          <publisher-name>Gateway Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Levene</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Learning structured medical information from social media</article-title>
          <source>J Biomed Inform</source>
          <year>2020</year>
          <month>10</month>
          <volume>110</volume>
          <fpage>103568</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(20)30198-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2020.103568</pub-id>
          <pub-id pub-id-type="medline">32942027</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(20)30198-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CH</given-names>
            </name>
          </person-group>
          <source>Foundations of Statistical Natural Language Processing. 4th ed</source>
          <year>2001</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <source>python-crfsuite</source>
          <access-date>2022-02-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://python-crfsuite.readthedocs.io/en/latest/">https://python-crfsuite.readthedocs.io/en/latest/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Okazaki</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>CRFsuite: A Fast Implementation of Conditional Random Fields (CRFs)</source>
          <access-date>2022-02-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.chokkan.org/software/crfsuite/">http://www.chokkan.org/software/crfsuite/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>LIBSVM</article-title>
          <source>ACM Trans Intell Syst Technol</source>
          <year>2011</year>
          <month>04</month>
          <day>01</day>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1145/1961189.1961199</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Eickenberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gervais</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mueller</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kossaifi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for neuroimaging with scikit-learn</article-title>
          <source>Front Neuroinform</source>
          <year>2014</year>
          <volume>8</volume>
          <fpage>14</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fninf.2014.00014"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fninf.2014.00014</pub-id>
          <pub-id pub-id-type="medline">24600388</pub-id>
          <pub-id pub-id-type="pmcid">PMC3930868</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <source>Support Vector Machines</source>
          <access-date>2022-02-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scikit-learn.org/stable/modules/svm.html">https://scikit-learn.org/stable/modules/svm.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mukherjee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chapelle</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Pontil</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Poggio</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Feature selection for SVMs</article-title>
          <source>Adv Neural Inf Process Syst</source>
          <year>2000</year>
          <volume>13</volume>
          <fpage>668</fpage>
          <lpage>674</lpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
