<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i6e26892</article-id>
      <article-id pub-id-type="pmid">34128811</article-id>
      <article-id pub-id-type="doi">10.2196/26892</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Constructing High-Fidelity Phenotype Knowledge Graphs for Infectious Diseases With a Fine-Grained Semantic Information Model: Development and Usability Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pacheco</surname>
            <given-names>Jennifer</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Seebode</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Benítez-Andrades</surname>
            <given-names>José Alberto</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Deng</surname>
            <given-names>Lizong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9314-262X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Luming</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2468-8631</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Tao</given-names>
          </name>
          <degrees>MM</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7521-4295</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Mi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6732-7970</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Shicheng</given-names>
          </name>
          <degrees>MM</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5893-8822</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>Taijiao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Center of Systems Medicine</institution>
            <institution>Institute of Basic Medical Sciences</institution>
            <institution>Chinese Academy of Medical Sciences &amp; Peking Union Medical College</institution>
            <addr-line>#5 Dong Dan San Tiao</addr-line>
            <addr-line>Dongcheng District</addr-line>
            <addr-line>Beijing, 100005</addr-line>
            <country>China</country>
            <phone>86 051262873781</phone>
            <email>taijiao@ibms.pumc.edu.cn</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6280-6347</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Center of Systems Medicine</institution>
        <institution>Institute of Basic Medical Sciences</institution>
        <institution>Chinese Academy of Medical Sciences &amp; Peking Union Medical College</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Suzhou Institute of Systems Medicine</institution>
        <addr-line>Suzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Jiangsu Institute of Clinical Immunology</institution>
        <institution>Jiangsu Key Laboratory of Clinical Immunology</institution>
        <institution>The First Affiliated Hospital of Soochow University</institution>
        <addr-line>Suzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Guangzhou Laboratory</institution>
        <addr-line>Guangzhou</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Taijiao Jiang <email>taijiao@ibms.pumc.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>6</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>6</issue>
      <elocation-id>e26892</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>1</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>20</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>1</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>6</day>
          <month>5</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Lizong Deng, Luming Chen, Tao Yang, Mi Liu, Shicheng Li, Taijiao Jiang. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 15.06.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/6/e26892" xlink:type="simple"/>
      <related-article related-article-type="correction-forward" xlink:title="This is a corrected version. See correction statement in:" xlink:href="https://www.jmir.org/2021/7/e31481" vol="23" page="e31481"> </related-article>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Phenotypes characterize the clinical manifestations of diseases and provide important information for diagnosis. Therefore, the construction of phenotype knowledge graphs for diseases is valuable to the development of artificial intelligence in medicine. However, phenotype knowledge graphs in current knowledge bases such as WikiData and DBpedia are coarse-grained knowledge graphs because they only consider the core concepts of phenotypes while neglecting the details (attributes) associated with these phenotypes.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>To characterize the details of disease phenotypes for clinical guidelines, we proposed a fine-grained semantic information model named PhenoSSU (semantic structured unit of phenotypes).</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>PhenoSSU is an “entity-attribute-value” model by its very nature, and it aims to capture the full semantic information underlying phenotype descriptions with a series of attributes and values. A total of 193 clinical guidelines for infectious diseases from Wikipedia were selected as the study corpus, and 12 attributes from SNOMED-CT were introduced into the PhenoSSU model based on the co-occurrences of phenotype concepts and attribute values. The expressive power of the PhenoSSU model was evaluated by analyzing whether PhenoSSU instances could capture the full semantics underlying the descriptions of the corresponding phenotypes. To automatically construct fine-grained phenotype knowledge graphs, a hybrid strategy that first recognized phenotype concepts with the MetaMap tool and then predicted the attribute values of phenotypes with machine learning classifiers was developed.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Fine-grained phenotype knowledge graphs of 193 infectious diseases were manually constructed with the BRAT annotation tool. A total of 4020 PhenoSSU instances were annotated in these knowledge graphs, and 3757 of them (89.5%) were found to be able to capture the full semantics underlying the descriptions of the corresponding phenotypes listed in clinical guidelines. By comparison, other information models, such as the clinical element model and the HL7 fast health care interoperability resource model, could only capture the full semantics underlying 48.4% (2034/4020) and 21.8% (914/4020) of the descriptions of phenotypes listed in clinical guidelines, respectively. The hybrid strategy achieved an F1-score of 0.732 for the subtask of phenotype concept recognition and an average weighted accuracy of 0.776 for the subtask of attribute value prediction.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>PhenoSSU is an effective information model for the precise representation of phenotype knowledge for clinical guidelines, and machine learning can be used to improve the efficiency of constructing PhenoSSU-based knowledge graphs. Our work will potentially shift the focus of medical knowledge engineering from a coarse-grained level to a more fine-grained level.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>knowledge graph</kwd>
        <kwd>knowledge granularity</kwd>
        <kwd>machine learning</kwd>
        <kwd>high-fidelity phenotyping</kwd>
        <kwd>phenotyping</kwd>
        <kwd>phenotype</kwd>
        <kwd>semantic</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>When people are sick, their bodies present a series of observable or perceptible abnormalities, which are called phenotypes. In medicine, the phenotype concept covers signs and symptoms, laboratory test results, and imaging findings [<xref ref-type="bibr" rid="ref1">1</xref>]. Phenotypes characterize the clinical manifestations of diseases, which provide important clues for diagnoses. Knowledge about disease phenotypes is usually documented as free text in medical textbooks or clinical guidelines, and such knowledge forms are hard for computers to use. Therefore, it is essential to transform phenotype knowledge into a machine-understandable format to facilitate the development of automated systems that could improve health care [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
      <p>To date, many structured knowledge bases, such as WikiData [<xref ref-type="bibr" rid="ref3">3</xref>], MalaCards [<xref ref-type="bibr" rid="ref4">4</xref>], and DBpedia [<xref ref-type="bibr" rid="ref5">5</xref>], have been constructed for disease phenotypes. In these knowledge bases, the phenotype knowledge of a disease is represented as a list of phenotype concepts or terms (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). However, such a concept-based representation only focuses on the presence or absence of a phenotype but neglects its contextual properties [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. The description “sudden, severe abdominal pain in the lower right abdomen,” for example, names three attributes of abdominal pain, including the onset pattern (sudden), severity (severe), and quadrant pattern (lower right abdomen). These attributes are valuable for diagnosis but missing in the provided concept-based representation. Due to the neglect of phenotypic details, current phenotype knowledge bases only characterize disease manifestations at a very coarse-grained level [<xref ref-type="bibr" rid="ref8">8</xref>], which is considered to be “sloppy and imprecise” [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>].</p>
      <p>To precisely represent phenotype knowledge in clinical guidelines, it is necessary to introduce fine-grained semantic information models [<xref ref-type="bibr" rid="ref11">11</xref>], which consider phenotypes and attributes simultaneously. The currently available semantic models for representing phenotype information include but are not limited to clinical element models (CEMs) [<xref ref-type="bibr" rid="ref12">12</xref>], the Health Level Seven fast health care interoperability resource (FHIR) model [<xref ref-type="bibr" rid="ref13">13</xref>], and the clinical quality language model [<xref ref-type="bibr" rid="ref14">14</xref>]. All these models can be viewed as standard entity-attribute-value structures, which represent phenotype information with sufficient details by using various attributes and qualifier values. For example, a CEM model considers 17 attributes associated with phenotypes, such as phenotype severity, laterality, and duration. Although semantic information models such as CEM and FHIR have defined many attributes for phenotypes, it should be noted that these attributes are mainly designed for recording phenotypic information in electronic medical records. However, the logic underlying phenotypic descriptions in clinical guidelines is quite different from that in electronic medical records. For example, clinical guidelines usually describe the frequency of a phenotype in a population (eg, 30% of patients may have a loss of smell); however, this attribute is not defined in CEM and FHIR models. In addition to the frequencies of phenotypes, other often used attributes such as temporal patterns (eg, acute, chronic) and pain characteristics (eg, sharp, dull) are also neglected by the CEM and FHIR models. Therefore, it is necessary to optimize the attributes included in current information models to make them more suitable for representing phenotype knowledge in clinical guidelines.</p>
      <p>In this work, we aimed to develop a semantic information model that could effectively characterize the details of disease phenotypes for clinical guidelines. A semantic information model named PhenoSSU (semantic structured unit of phenotype) was developed based on the clinical guidelines for 193 infectious diseases from Wikipedia. A total of 12 attributes were included in PhenoSSU, which characterized the details of phenotypes from various aspects. Based on PhenoSSU, we constructed fine-grained phenotype knowledge graphs for these infectious diseases. Considering the increased annotation costs associated with the introduction of PhenoSSU, we also explored the potential of machine learning for performing automatic recognition for PhenoSSU based on free text. It is hoped that our work will contribute to the large-scale construction of fine-grained phenotype knowledge graphs for more diseases.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Materials</title>
        <p>We collected the clinical guidelines for 193 infectious diseases from Wikipedia [<xref ref-type="bibr" rid="ref15">15</xref>] as the corpus for constructing fine-grained phenotype knowledge graphs. In Wikipedia, the phenotypic knowledge of infectious diseases was usually buried in a section named signs and symptoms (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Although Wikipedia is created and edited by volunteers worldwide, many studies have proven the high quality of its biomedical content [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. In addition, phenotype knowledge graphs for WikiData [<xref ref-type="bibr" rid="ref3">3</xref>] and DBpedia [<xref ref-type="bibr" rid="ref5">5</xref>] were also constructed based on clinical guidelines from Wikipedia.</p>
      </sec>
      <sec>
        <title>Design of PhenoSSU</title>
        <p>PhenoSSU, by its very nature, is an entity-attribute-value model that consists of a phenotype concept along with a collection of attributes. Determining the attributes associated with various phenotypes is the key to the design of PhenoSSU. Four inclusion criteria for attributes were considered in this study:</p>
        <list list-type="bullet">
          <list-item>
            <p>Introduced attribute and value set should come from a standard medical ontology to avoid the arbitrariness of defining new attributes. Systematized Nomenclature of Medicine–Clinical Terms (SNOMED-CT) [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], one of the most comprehensive clinical terminology databases in the world, was selected as the standard for normalizing both phenotypes and attributes.</p>
          </list-item>
          <list-item>
            <p>Introduced attribute should be a modifier associated with phenotypes rather than an entity independent of phenotypes. The concepts found in SNOMED-CT were organized into 19 distinct hierarchies. Phenotypes and attributes were mainly located in the clinical finding and qualifier value hierarchies, respectively (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
          </list-item>
          <list-item>
            <p>Value set of the introduced attribute should contain categorical variables with limited dimensionality. For example, the severity attribute in SNOMED-CT contains a value set including mild, moderate, and severe. This criterion is for convenience when configuring attributes in the brat rapid annotation tool (BRAT) [<xref ref-type="bibr" rid="ref20">20</xref>] (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
          </list-item>
          <list-item>
            <p>Introduced attribute should occur at least once in the studied corpus. This criterion is for reducing redundancy when introducing many unused attributes.</p>
          </list-item>
        </list>
        <p>To effectively find the attributes associated with various phenotypes, we developed a simple co-occurrence–based method for attribute filtering (<xref rid="figure1" ref-type="fig">Figure 1</xref>A). Specifically, the phenotypes in the corpus were annotated with the MetaMap tool [<xref ref-type="bibr" rid="ref21">21</xref>], a state-of-the-art concept recognizer, and the values of the attributes in the corpus were annotated with the Flashtext tool [<xref ref-type="bibr" rid="ref22">22</xref>], a string-based concept recognizer. If an attribute co-occurred with any phenotypes in at least 2 sentences from the whole corpus, we selected the attribute as a candidate that was potentially associated with phenotypes. Then, we manually filtered the attributes that were truly related to phenotypes and built an initial version of PhenoSSU. The initial PhenoSSU model was optimized during the annotation process. When annotators found a new contextual property associated with phenotypes, we searched for its existence in SNOMED-CT and added the standard attribute corresponding to that contextual property into the initial PhenoSSU model.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Modeling process of PhenoSSU: (A) modeling PhenoSSU based on sentence-level cooccurrences of phenotype concepts and attribute values in clinical guidelines and (B) components of the PhenoSSU model consist of a phenotype concept and 12 attributes.</p>
          </caption>
          <graphic xlink:href="jmir_v23i6e26892_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The final PhenoSSU model contained 12 attributes, which could be classified into 3 categories according to the phenotypic details they characterized (<xref rid="figure1" ref-type="fig">Figure 1</xref>B): (1) details about the presence of phenotypes, including a phenotype’s assertion, frequency in a population, age specificity, sex specificity, and specificity regarding the severity of illness; (2) details about the manifestations of phenotypes, including a phenotype’s temporal pattern, severity, appearance color, and sensation characteristics; and (3) details about the spatial distributions of phenotypes, including a phenotype’s laterality, spatial pattern and quadrant pattern. The SNOMED-CT codes, definitions, and value sets of these attributes are listed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The distribution of these 12 attributes in the studied corpus is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>A.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Expressive power of PhenoSSU: (A) prevalence of the 12 attributes in the studied corpus, (B) examples of precise and imprecise representations for original phenotype descriptions with the PhenoSSU model, and (C) comparisons of precise representation percentages among different information models.</p>
          </caption>
          <graphic xlink:href="jmir_v23i6e26892_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Annotation and Normalization of PhenoSSU</title>
        <p>The annotation task of PhenoSSU can be divided into 2 steps: annotating a phenotype and annotating the attributes associated with that phenotype. Some annotation examples of different phenotypes attributes defined in PhenoSSU are presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The clinical guides of 193 infectious diseases were annotated with the BRAT (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). To facilitate the annotation process, we preannotated the phenotypes found in clinical guidelines with the MetaMap tool. Then, two annotators (TY and SL) independently annotated the 193 clinical guidelines by following the annotation guide developed by LD and TJ. Their independent annotations were merged and visualized in the BRAT. To mark inconsistent annotations, we introduced a virtual attribute named agreement into PhenoSSU. Two independently annotated PhenoSSU models were regarded as consistent when both their phenotypes (text spans) and associated attribute values were the same. If there were inconsistencies in any part of a PhenoSSU model, the value of the agreement attribute was set to disagreement. The initial interannotator agreement at the PhenoSSU level was calculated with a Cohen kappa statistic [<xref ref-type="bibr" rid="ref23">23</xref>] of 0.861. All inconsistent annotations were solved by an adjudication process (TJ).</p>
        <p>The phenotypes annotated in BRAT were normalized with SNOMED-CT. To facilitate the normalization process, we also leveraged the MetaMap tool to obtain candidate concepts from the SNOMED-CT database and then manually selected the concept corresponding to each query phenotype. There was no need to normalize the attribute values because they were already normalized in SNOMED-CT.</p>
        <p>One aspect to note about the normalization process is the special treatment used for finding sites of phenotypes. Finding sites were not explicitly included in the PhenoSSU model because they are entities independent of phenotypes. In SNOMED-CT, there were more than 39,000 concepts of finding sites in the body structure hierarchy, and these were hard to set as a value list in the BRAT. However, finding sites are indispensable information for describing phenotypes. Therefore, we also annotated the entities of finding sites associated with phenotypes. Taking the annotation of “bleeding from the nose and gum” as an example, the entities of the phenotype (bleeding) and two finding sites (nose, gum) were annotated separately and connected with a relation curve named locate (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). If a phenotype had an associated finding site, the phenotype together with the finding site was regarded as an integral concept in the normalization process. For example, the annotation of “bleeding” associated with “nose” was normalized as “249366005|epistaxis,” which shared the same codes as the annotation of “bleeding from nose.” If a composite concept could not be normalized as a whole (eg, “rash associated with hands”), we standardized the phenotype and its corresponding finding site separately and combined them into a postcoordination expression [<xref ref-type="bibr" rid="ref24">24</xref>] (eg, “271807003|Rash”: “33712006|Skin structure of hand”; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). In summary, information about finding sites was implicitly considered an integral part of a phenotype concept rather than its attribute.</p>
      </sec>
      <sec>
        <title>Automatic Recognition of PhenoSSU</title>
        <p>The manual annotation of a PhenoSSU model is a very time-consuming process because annotators not only need to find the mention of a phenotype but also need to determine the existence of attribute trigger terms in the context surrounding a phenotype. To reduce annotation costs, it is necessary to develop algorithms for the automatic annotation of PhenoSSU models.</p>
        <p>The recognition task of PhenoSSU can be divided into 2 subtasks: phenotype concept recognition and attribute value prediction. The first subtask aims to recognize the text spans corresponding to phenotypes, and the second subtask aims to select appropriate values for 12 attributes based on a phenotype’s context.</p>
        <p>The 193 annotated clinical guides were randomly divided into a training set and a test set at a ratio of 6:4. For the subtask of phenotype concept recognition, we still used the MetaMap tool, which can recognize phenotype concepts based on the Metathesaurus in the Unified Medical Language System (2020AA release) [<xref ref-type="bibr" rid="ref25">25</xref>]. We optimized the parameters of the MetaMap tool based on its performance on the task of recognizing phenotype concepts in the training set (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>The subtask of attribute value prediction can be regarded as a classification problem, and two machine learning-based models were explored for this subtask. One model was based on a support vector machine (SVM), and the other model was based on a bidirectional long short-term memory (BiLSTM) neural network. For the value classification model of a specific attribute, the input was the encoded feature vectors of a phenotype’s context and the output was one of the normalized values for this attribute.</p>
        <p>We chose an SVM for developing attribute value prediction models because SVM-based models have proven their efficiency in the 2010 Informatics for Integrating Biology &amp; the Bedside/Veterans Affairs challenge [<xref ref-type="bibr" rid="ref26">26</xref>] and SemEval-2015 Task 14 [<xref ref-type="bibr" rid="ref27">27</xref>]. In the SVM-based model (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), the context of a phenotype was encoded with the existence of trigger terms (terms that indicated a normalized value [eg, “sudden onset” was the trigger term of the normalized value “acute”]) and their distances to the target phenotype [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. The SVM-based model was developed by using the scikit-learn package (version 0.23.1) [<xref ref-type="bibr" rid="ref28">28</xref>]. The parameters of the SVM-based model were optimized by using a grid search strategy [<xref ref-type="bibr" rid="ref29">29</xref>] on the training set.</p>
        <p>Inspired by recent methodology developments for the assertion status prediction task [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], we chose BiLSTM for developing attribute value prediction models. The referenced studies [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>] showed that BiLSTM and attention mechanisms could achieve better performances than other approaches when classifying assertions of medical concepts. Since assertion status prediction belonged to the task of attribute value prediction, we transferred the attention-enhanced BiLSTM model to our study. In a given BiLSTM-based model (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), the context of a phenotype was first split into 3 segments, including the left context, the phenotype itself, and the right context, which were then encoded into a 3×768 vector with a pretrained language model named BERT (bidirectional encoder representation from transformers) [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>]. Each BiLSTM-based model was developed by using the Keras package (version 2.3.1) [<xref ref-type="bibr" rid="ref35">35</xref>], and the BERT encoding process was performed by using the bert-as-service package (version 1.10.0) [<xref ref-type="bibr" rid="ref36">36</xref>]. Considering the very imbalanced distributions of attribute values in our dataset (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), we used the synthetic minority oversampling technique [<xref ref-type="bibr" rid="ref37">37</xref>] from the imbalanced-learn package (version 0.7.0) [<xref ref-type="bibr" rid="ref38">38</xref>] to balance the sample distributions. The hyperparameters of the constructed BiLSTM-based models were optimized using an early stopping strategy [<xref ref-type="bibr" rid="ref39">39</xref>] on the training set.</p>
      </sec>
      <sec>
        <title>Evaluation of the Performance for Recognizing PhenoSSU</title>
        <p>To evaluate the performance of the proposed algorithm in extracting PhenoSSU models from free text, we used the evaluation metrics from SemEval-2015 Task 14: Analysis of Clinical Text [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <p>The evaluation metric for the subtask of phenotype concept recognition was the F1-score. A predicted phenotype concept was regarded as a true positive if its text span overlapped with a gold standard text span. The precision metric was calculated as the fraction of correctly predicted phenotypes among all phenotypes identified by MetaMap, and the recall metric was calculated as the fraction of correctly predicted phenotypes among all phenotypes identified by the annotators. The F1-score was calculated as the harmonic mean of precision and recall.</p>
        <p>We chose the average weighted accuracy as the evaluation metric for the subtask of attribute value prediction because the distributions of different attribute values were very imbalanced. The average weighted accuracy metric considers the prevalence of an attribute value in the corpus, so it can measure how good an algorithm is at predicting the rare values of an attribute. The detailed calculating process of the average weighted accuracy can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>Evaluation of the Expressive Power of PhenoSSU</title>
        <p>Since the aim of this work was to develop a semantic information model that was more suitable than current approaches for representing phenotype knowledge in clinical guidelines, it was necessary to evaluate whether the annotated PhenoSSU model could capture the full semantics underlying the original descriptions of phenotypes. For example, in <xref rid="figure2" ref-type="fig">Figure 2</xref>B, the description “common symptoms include sudden onset of fever” could be perfectly represented by the PhenoSSU model (phenotype: fever; assertion: possible; frequency: frequent; temporal pattern: acute). By comparison, the description “abscesses grow larger as disease progress, often over months” was only partially represented by the PhenoSSU model (phenotype: abscess; assertion: present), which missed the information regarding the course and duration of abscess associated with the description.</p>
        <p>To evaluate the expressive power of PhenoSSU, we introduced a virtual attribute named “equal to the original description” into the PhenoSSU model. If the annotated PhenoSSU did not capture the full semantics of an original description, we set the value of this attribute to “partial.” Two annotators (TY and SL) independently evaluated the expressive power of the annotated PhenoSSU model. The initial interannotator agreement as measured with Cohen kappa statistic was 0.903 (3631/4020). We reached a consensus for those inconsistent judgments by an adjudication process (TJ).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview of the PhenoSSU Model and PhenoSSU-Based Knowledge Graphs</title>
        <p>To characterize the details of phenotypes for clinical guidelines, a semantic information model named PhenoSSU was proposed. With the introduction of 12 attributes associated with various phenotypes, the obtained knowledge graphs based on PhenoSSU were more fine-grained than those based on phenotype concepts. In this work, 193 PhenoSSU-based knowledge graphs for infectious diseases were constructed. At the concept level, we annotated 4020 phenotypic terms, 3962 of which could be normalized with 1508 concepts in SNOMED-CT. At the attribute level, we annotated 5278 nondefault attribute values (“present” was the default attribute value for the assertion attribute, and “none” was the default attribute value for other attributes), which indicated the widespread presence of contextual properties for phenotypes in clinical guides. The most commonly used attributes included assertion, frequency in a population, age specificity, phenotype severity, and temporal pattern (<xref rid="figure2" ref-type="fig">Figure 2</xref>A).</p>
        <p>Since the knowledge graphs in WikiData were also extracted from Wikipedia, we compared our knowledge graphs with those in WikiData at the concept level. WikiData built knowledge graphs for 66 of the 193 diseases, and these graphs included 354 phenotype concepts. Our annotations covered 297 of the 354 (83.9%) phenotypes from WikiData. For the uncovered phenotypes, we could not confirm their existence on the corresponding webpages of Wikipedia (including current and historical webpages). Most of these uncovered phenotypes may come from the manual additions of volunteers, who made use of sources other than Wikipedia (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Expressive Power of PhenoSSU for Representing Phenotype Knowledge</title>
        <p>To evaluate the expressive power of the PhenoSSU model quantitatively, we manually analyzed whether a PhenoSSU instance could capture the full semantics underlying the corresponding descriptions of phenotypes (<xref rid="figure2" ref-type="fig">Figure 2</xref>B).</p>
        <p>In this study, we annotated 4020 PhenoSSU instances, 3757 of which (89.5%) were determined to precisely represent the original phenotype knowledge described by natural language (<xref rid="figure2" ref-type="fig">Figure 2</xref>C). If we only considered the presence and absence of phenotype concepts (concept-based representation), the percentage of precise representations decreased to 20.3% (853/4200). This result further suggested the necessity of introducing the attributes associated with phenotypes into the developed model. We also analyzed the expressive power of the CEM and FHIR models for phenotypes and found that their percentages of precise representations were 48.4% (2034/4200) and 21.8% (914/4200), respectively. Most of the attributes defined in the CEM and FHIR models were not used in clinical guidelines except for the severity and laterality of phenotypes. The CEM model achieved a higher expression power than that of the FHIR model because it considered the uncertainty of phenotypes (assertion: possible), which is a frequently used attribute in clinical guidelines. Please see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for detailed comparisons between the attributes used in the PhenoSSU, CEM, and FHIR models.</p>
      </sec>
      <sec>
        <title>Potential for Increasing the Speed of PhenoSSU Model Annotation With Machine Learning</title>
        <p>With the introduction of attributes, it would take more time to annotate a PhenoSSU model than to annotate phenotype concepts. To increase the efficiency of annotating PhenoSSU models, we developed a hybrid strategy that first recognized phenotype concepts with the MetaMap tool and then predicted the attribute values of phenotypes with SVM-based or BiLSTM-based classifiers (<xref rid="figure3" ref-type="fig">Figure 3</xref>). For the subtask of phenotype concept recognition, the MetaMap tool achieved an F1-score of 0.732 (precision 0.660; recall 0.824), which was comparable to its performance on other medical corpora [<xref ref-type="bibr" rid="ref40">40</xref>]. For the subtask of attribute value prediction, the average weighted accuracy of the SVM-based method (0.776) was better than that of the BiLSTM-based model (0.691). This may be due to limited number of training data, which made it hard for the deep learning-based approach to learn useful features from contexts. However, the performance of the BiLSTM-based model was still higher than the performance of a reference model (0.542) that always selected default values for attributes (it selected “present” for the assertion attribute and “none” for other attributes). These results indicate that machine learning methods have the potential to speed up PhenoSSU annotations. The detailed performances of the compared models for predicting the values of different attributes are listed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Automatic recognition of PhenoSSU.</p>
          </caption>
          <graphic xlink:href="jmir_v23i6e26892_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this work, we designed a fine-grained information model named PhenoSSU, which can precisely represent phenotype knowledge for clinical guidelines. We also developed an automatic strategy to extract PhenoSSU models from clinical guidelines and found that machine learning could be used to improve the efficiency of PhenoSSU annotation. Taken together, our work will provide a useful theoretical and technical guide for the construction of fine-grained phenotype knowledge graphs.</p>
        <p>From the design of PhenoSSU, it can be seen that PhenoSSU was derived from SNOMED-CT because both the phenotype concepts and attribute values in PhenoSSU came from SNOMED-CT. PhenoSSU strengthened the expressive power of SNOMED-CT by combining 12 attributes with phenotype concepts. In SNOMED-CT, there was a technique named postcoordination expression [<xref ref-type="bibr" rid="ref24">24</xref>] that could also capture the details of phenotypes by using combinations of existing concepts. For example, the out-of-vocabulary concept “severe headache, unilateral” can be expressed as a postcoordination of 3 concepts—headache (25064002): severity (272141005) = severe (24484000) and laterality (272741003) = unilateral (66459002). Compared with the postcoordination expression technique, PhenoSSU is a predefined information model that provides a general framework for knowledge representation. It is more convenient to configure the PhenoSSU model into the BRAT annotation tool to construct fine-grained phenotype knowledge graphs than to use the competing approach.</p>
        <p>In recent years, machine learning, especially deep learning, has been widely used for processing medical information [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref44">44</xref>]. In this work, we also explored the potential of automatically constructing fine-grained phenotype knowledge graphs based on machine learning. The results in <xref rid="figure3" ref-type="fig">Figure 3</xref> suggest that machine learning can assist with the human annotations of PhenoSSU to some extent. However, there are still great challenges to overcome to improve the performance of machine learning, especially the insufficiency and imbalanced distributions of training data. In future work, an active learning framework [<xref ref-type="bibr" rid="ref45">45</xref>] that incorporates both human intelligence and machine intelligence may be a better strategy for constructing fine-grained knowledge graphs.</p>
        <p>The improvement of knowledge granularity for disease phenotypes may potentially benefit knowledge-based diagnosis systems because the differential diagnostic capability of a PhenoSSU model is theoretically stronger than that of a single phenotype concept. From the perspective of coarse-grained knowledge graphs, some diseases (eg, the flu and common cold) have many similar symptoms (eg, fever and cough); however, these similar symptoms may have obvious differences from the perspectives of fine-grained knowledge graphs. For example, fever may be present in both flu and common cold. However, fever is more common in flu patients and usually appears suddenly with a body temperature of 38 degrees or above. By comparison, fever is rarely seen in common cold cases and usually appears gradually. Therefore, a diagnosis system cannot exclude the common cold if a patient has fever; however, it can safely exclude the common cold if a patient has such a PhenoSSU instance like “phenotype: fever; temporal pattern: acute; severity: severe.” PhenoSSU-based knowledge graphs should be very suitable for dialogue-based symptom checkers such as babylon [<xref ref-type="bibr" rid="ref46">46</xref>] and symptoma [<xref ref-type="bibr" rid="ref47">47</xref>], which collects the symptoms of a patient one by one. Considering the details of phenotypes in inquiry processing may potentially improve the efficiency and accuracy of dialogue-based symptom checkers.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>One limitation of this work is that we only considered the corpus of infectious diseases during the modeling process of PhenoSSU. In addition, we only considered attributes with categorical values and did not consider attributes with numeric values. Another limitation of this study is that we only tested the effectiveness of the PhenoSSU model for 193 infectious diseases, which is a small number considering that thousands of other diseases exist. In addition, attributes suitable for infectious diseases may not be suitable for other types of diseases. We will solve these limitations during the process of constructing PhenoSSU-based knowledge graphs for more diseases in future work.</p>
        <p>The annotation guidelines for PhenoSSU and the PhenoSSU-based knowledge graphs for 193 infectious diseases can be found by visiting our website [<xref ref-type="bibr" rid="ref48">48</xref>]. The scripts for modeling and extracting PhenoSSU can be found on GitHub [<xref ref-type="bibr" rid="ref49">49</xref>].</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>PhenoSSU is a fine-grained semantic information model that can precisely represent phenotype knowledge in clinical guidelines, and machine learning can be used to improve the efficiency of constructing PhenoSSU-based knowledge graphs.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary figures, tables and texts.</p>
        <media xlink:href="jmir_v23i6e26892_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 933 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representation from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BiLSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BRAT</term>
          <def>
            <p>brat rapid annotation tool</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CEM</term>
          <def>
            <p>clinical element model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">FHIR</term>
          <def>
            <p>fast health care interoperability resource</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PhenoSSU</term>
          <def>
            <p>semantic structured unit of phenotype</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SNOMED-CT</term>
          <def>
            <p>Systematized Nomenclature of Medicine–Clinical Terms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by grants 32070678 and 31671371 from the National Natural Science Foundation of China, and grant EKPG21-12 from Emergency Key Program of Guangzhou Laboratory，and grants 2016-I2M-1-005 and 2020-I2M-2-003 from the Chinese Academy of Medical Sciences Initiative for Innovative Medicine. We sincerely thank colleagues in our lab and experts in the biomedical field for their thoughtful suggestions to improve this work.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brookes</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Human genotype-phenotype databases: aims, challenges and opportunities</article-title>
          <source>Nat Rev Genet</source>
          <year>2015</year>
          <month>12</month>
          <volume>16</volume>
          <issue>12</issue>
          <fpage>702</fpage>
          <lpage>715</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3932</pub-id>
          <pub-id pub-id-type="medline">26553330</pub-id>
          <pub-id pub-id-type="pii">nrg3932</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oellrich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Collier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Groza</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rebholz-Schuhmann</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Boland</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Georgiev</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Livingston</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Luna</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mallon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Manda</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>PN</given-names>
            </name>
            <name name-style="western">
              <surname>Rustici</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Winnenburg</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dumontier</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The digital revolution in phenotyping</article-title>
          <source>Brief Bioinform</source>
          <year>2016</year>
          <month>09</month>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>819</fpage>
          <lpage>830</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26420780"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbv083</pub-id>
          <pub-id pub-id-type="medline">26420780</pub-id>
          <pub-id pub-id-type="pii">bbv083</pub-id>
          <pub-id pub-id-type="pmcid">PMC5036847</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Turki</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shafee</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hadj Taieb</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Ben Aouicha</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vrandečić</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hamdi</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Wikidata: a large-scale collaborative ontological medical database</article-title>
          <source>J Biomed Inform</source>
          <year>2019</year>
          <month>11</month>
          <volume>99</volume>
          <fpage>103292</fpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2019.103292</pub-id>
          <pub-id pub-id-type="medline">31557529</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(19)30211-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rappaport</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Twik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Plaschkes</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nudel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Iny Stein</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Levitt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gershoni</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Morrey</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Safran</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lancet</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>MalaCards: an amalgamated human disease compendium with diverse clinical and genetic annotation and structured search</article-title>
          <source>Nucleic Acids Res</source>
          <year>2017</year>
          <month>01</month>
          <day>04</day>
          <volume>45</volume>
          <issue>D1</issue>
          <fpage>D877</fpage>
          <lpage>D887</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27899610"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkw1012</pub-id>
          <pub-id pub-id-type="medline">27899610</pub-id>
          <pub-id pub-id-type="pii">gkw1012</pub-id>
          <pub-id pub-id-type="pmcid">PMC5210521</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Isele</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jakob</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jentzsch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kontokostas</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mendes</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>DBpedia–a large-scale, multilingual knowledge base extracted from Wikipedia</article-title>
          <source>Semantic web</source>
          <year>2015</year>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>167</fpage>
          <lpage>169</lpage>
          <pub-id pub-id-type="doi">10.3233/sw-140134</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haendel</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Classification, ontology, and precision medicine</article-title>
          <source>N Engl J Med</source>
          <year>2018</year>
          <month>12</month>
          <day>11</day>
          <volume>379</volume>
          <issue>15</issue>
          <fpage>1452</fpage>
          <lpage>1462</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMra1615014</pub-id>
          <pub-id pub-id-type="medline">30304648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harkema</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Thornblade</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
          </person-group>
          <article-title>ConText: an algorithm for determining negation, experiencer, and temporal status from clinical reports</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>10</month>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>839</fpage>
          <lpage>851</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(09)00074-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2009.05.002</pub-id>
          <pub-id pub-id-type="medline">19435614</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(09)00074-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC2757457</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Albers</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>High-fidelity phenotyping: richness and freedom from bias</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>03</month>
          <day>01</day>
          <volume>25</volume>
          <issue>3</issue>
          <fpage>289</fpage>
          <lpage>294</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29040596"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx110</pub-id>
          <pub-id pub-id-type="medline">29040596</pub-id>
          <pub-id pub-id-type="pii">4484121</pub-id>
          <pub-id pub-id-type="pmcid">PMC7282504</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Deep phenotyping for precision medicine</article-title>
          <source>Hum Mutat</source>
          <year>2012</year>
          <month>05</month>
          <volume>33</volume>
          <issue>5</issue>
          <fpage>777</fpage>
          <lpage>780</lpage>
          <pub-id pub-id-type="doi">10.1002/humu.22080</pub-id>
          <pub-id pub-id-type="medline">22504886</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Delude</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Deep phenotyping: the details of disease</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>11</month>
          <day>4</day>
          <volume>527</volume>
          <issue>7576</issue>
          <fpage>S14</fpage>
          <lpage>S15</lpage>
          <pub-id pub-id-type="doi">10.1038/527s14a</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical information extraction applications: a literature review</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>01</month>
          <volume>77</volume>
          <fpage>34</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30256-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.11.011</pub-id>
          <pub-id pub-id-type="medline">29162496</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30256-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC5771858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oniki</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Beebe</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Coyle</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Parker</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Solbrig</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Marchant</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kaggal</surname>
              <given-names>VC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Huff</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Clinical element models in the SHARPn consortium</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <month>03</month>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>248</fpage>
          <lpage>256</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26568604"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv134</pub-id>
          <pub-id pub-id-type="medline">26568604</pub-id>
          <pub-id pub-id-type="pii">ocv134</pub-id>
          <pub-id pub-id-type="pmcid">PMC6283078</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bender</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sartipi</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>HL7 FHIR: an agile and RESTful approach to healthcare information exchange</article-title>
          <year>2013</year>
          <conf-name>Proc 26th IEEE Int Symp Computer-Based Med Syst</conf-name>
          <conf-date>2013</conf-date>
          <conf-loc>Porto</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cbms.2013.6627810</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <source>Clinical Quality Language</source>
          <access-date>2021-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cql.hl7.org/">https://cql.hl7.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <source>Wikipedia list of infectious diseases</source>
          <access-date>2020-09-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://en.wikipedia.org/wiki/List_of_infectious_diseases">https://en.wikipedia.org/wiki/List_of_infectious_diseases</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajagopalan</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Khanna</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Leiter</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Stott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Showalter</surname>
              <given-names>TN</given-names>
            </name>
            <name name-style="western">
              <surname>Dicker</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Lawrence</surname>
              <given-names>YR</given-names>
            </name>
          </person-group>
          <article-title>Patient-oriented cancer information on the internet: a comparison of wikipedia and a professionally maintained database</article-title>
          <source>J Oncol Pract</source>
          <year>2011</year>
          <month>09</month>
          <volume>7</volume>
          <issue>5</issue>
          <fpage>319</fpage>
          <lpage>323</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22211130"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/JOP.2010.000209</pub-id>
          <pub-id pub-id-type="medline">22211130</pub-id>
          <pub-id pub-id-type="pii">3714338</pub-id>
          <pub-id pub-id-type="pmcid">PMC3170066</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reavley</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mackinnon</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Alvarez-Jimenez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hetrick</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Killackey</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Purcell</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yap</surname>
              <given-names>MBH</given-names>
            </name>
            <name name-style="western">
              <surname>Jorm</surname>
              <given-names>AF</given-names>
            </name>
          </person-group>
          <article-title>Quality of information sources about mental disorders: a comparison of Wikipedia with centrally controlled web and printed sources</article-title>
          <source>Psychol Med</source>
          <year>2012</year>
          <month>08</month>
          <volume>42</volume>
          <issue>8</issue>
          <fpage>1753</fpage>
          <lpage>1762</lpage>
          <pub-id pub-id-type="doi">10.1017/S003329171100287X</pub-id>
          <pub-id pub-id-type="medline">22166182</pub-id>
          <pub-id pub-id-type="pii">S003329171100287X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Donnelly</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>SNOMED-CT: The advanced terminology and coding system for eHealth</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2006</year>
          <volume>121</volume>
          <fpage>279</fpage>
          <lpage>290</lpage>
          <pub-id pub-id-type="medline">17095826</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gaudet-Blavignac</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Foufi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Bjelogrlic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lovis</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Use of the systematized nomenclature of medicine clinical terms (SNOMED CT) for processing free text in health care: systematic scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>01</month>
          <day>26</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e24594</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e24594/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24594</pub-id>
          <pub-id pub-id-type="medline">33496673</pub-id>
          <pub-id pub-id-type="pii">v23i1e24594</pub-id>
          <pub-id pub-id-type="pmcid">PMC7872838</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stenetorp</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pyysalo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Topić</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ohta</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>BRAT: a web-based tool for NLP-assisted text annotation</article-title>
          <year>2012</year>
          <conf-name>The 13th Conference of the European Chapter of the Association for Computational Linguistics</conf-name>
          <conf-date>April 23-27 2012</conf-date>
          <conf-loc>Avignon</conf-loc>
          <fpage>102</fpage>
          <lpage>107</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/E12-2021.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Lang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>An overview of MetaMap: historical perspective and recent advances</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>229</fpage>
          <lpage>236</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/lookup/pmidlookup?view=long&amp;pmid=20442139"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.002733</pub-id>
          <pub-id pub-id-type="medline">20442139</pub-id>
          <pub-id pub-id-type="pii">17/3/229</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Replace or retrieve keywords in documents at scale</article-title>
          <source>ArXiv.</source>
          <comment>Preprint posted online on November 9, 2017
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1711.00046"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McHugh</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Interrater reliability: the kappa statistic</article-title>
          <source>Biochem Med (Zagreb)</source>
          <year>2012</year>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>276</fpage>
          <lpage>282</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.biochemia-medica.com/2012/22/276"/>
          </comment>
          <pub-id pub-id-type="medline">23092060</pub-id>
          <pub-id pub-id-type="pmcid">PMC3900052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dhombres</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Winnenburg</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Case</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Extending the coverage of phenotypes in SNOMED CT through post-coordination</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2015</year>
          <volume>216</volume>
          <fpage>795</fpage>
          <pub-id pub-id-type="doi">10.1007/978-1-84882-803-2_12</pub-id>
          <pub-id pub-id-type="medline">26262161</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>1</day>
          <volume>32</volume>
          <issue>Database issue</issue>
          <fpage>D267</fpage>
          <lpage>D270</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://nar.oxfordjournals.org/cgi/pmidlookup?view=long&amp;pmid=14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>Ö</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>DuVall</surname>
              <given-names>SL</given-names>
            </name>
          </person-group>
          <article-title>2010 i2b2/VA challenge on concepts, assertions, and relations in clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>552</fpage>
          <lpage>556</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21685143"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000203</pub-id>
          <pub-id pub-id-type="medline">21685143</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000203</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gorman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Manandhar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>SemEval-2015 task 14: analysis of clinical text</article-title>
          <year>2015</year>
          <conf-name>The 9th International Workshop on Semantic Evaluation (SemEval 2015)</conf-name>
          <conf-date>June 4-5, 2015</conf-date>
          <conf-loc>Denver</conf-loc>
          <fpage>303</fpage>
          <lpage>310</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/s15-2051</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>J Machine Learn Res</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
          <pub-id pub-id-type="doi">10.5555/1953048.2078195</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <source>Tuning the hyper-parameters of an estimator</source>
          <access-date>2020-10-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scikit-learn.org/stable/modules/grid_search.html">https://scikit-learn.org/stable/modules/grid_search.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>ZC</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Pang</surname>
              <given-names>YL</given-names>
            </name>
          </person-group>
          <article-title>Medical assertion classification in Chinese EMRs using attention enhanced neural network</article-title>
          <source>Math Biosci Eng</source>
          <year>2019</year>
          <month>03</month>
          <day>08</day>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>1966</fpage>
          <lpage>1977</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aimspress.com/article/10.3934/mbe.2019096"/>
          </comment>
          <pub-id pub-id-type="doi">10.3934/mbe.2019096</pub-id>
          <pub-id pub-id-type="medline">31137195</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kocaman</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Talby</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Improving clinical document understanding on COVID-19 research with spark NLP</article-title>
          <source>ArXiv.</source>
          <comment>Preprint posted online on December 7, 2020
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2012.04005"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bert: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Arxiv.</source>
          <comment>Preprint posted online on May 24, 2019
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1810.04805"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Y</given-names>
            </name>
            <collab>Uzuner</collab>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Starren</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Segment convolutional neural networks (Seg-CNNs) for classifying relations in clinical notes</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>01</month>
          <day>01</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>93</fpage>
          <lpage>98</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29025149"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx090</pub-id>
          <pub-id pub-id-type="medline">29025149</pub-id>
          <pub-id pub-id-type="pii">4101223</pub-id>
          <pub-id pub-id-type="pmcid">PMC6381760</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Attention-based bidirectional long short-term memory networks for relation classification</article-title>
          <year>2016</year>
          <conf-name>Proc 54th Annu Mtg Assoc Comp Linguist</conf-name>
          <conf-date>2016</conf-date>
          <conf-loc>Berlin</conf-loc>
          <fpage>207</fpage>
          <lpage>212</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/p16-2034</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gulli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pal</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>Deep Learning with Keras</source>
          <year>2017</year>
          <publisher-loc>Birmingham</publisher-loc>
          <publisher-name>Packt Publishing Ltd</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>bert-as-service</source>
          <access-date>2020-09-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/hanxiao/bert-as-service">https://github.com/hanxiao/bert-as-service</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>WP</given-names>
            </name>
          </person-group>
          <article-title>SMOTE: synthetic minority over-sampling technique</article-title>
          <source>J Artif Intell Res</source>
          <year>2002</year>
          <month>06</month>
          <day>01</day>
          <volume>16</volume>
          <fpage>321</fpage>
          <lpage>357</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lemaître</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Aridas</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Imbalanced-learn: a Python toolbox to tackle the curse of imbalanced datasets in machine learning</article-title>
          <source>J Machine Learn Res</source>
          <year>2017</year>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>559</fpage>
          <lpage>563</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jmlr.org/papers/volume18/16-365/16-365.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <source>Deep Learning</source>
          <year>2016</year>
          <publisher-loc>Cambridge</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
          </person-group>
          <article-title>MetaMap Lite: an evaluation of a new Java implementation of MetaMap</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>07</month>
          <day>01</day>
          <volume>24</volume>
          <issue>4</issue>
          <fpage>841</fpage>
          <lpage>844</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28130331"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw177</pub-id>
          <pub-id pub-id-type="medline">28130331</pub-id>
          <pub-id pub-id-type="pii">ocw177</pub-id>
          <pub-id pub-id-type="pmcid">PMC6080672</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Datta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Si</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Soni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Deep learning in clinical natural language processing: a methodical review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>03</month>
          <day>01</day>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>457</fpage>
          <lpage>470</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz200</pub-id>
          <pub-id pub-id-type="medline">31794016</pub-id>
          <pub-id pub-id-type="pii">5651084</pub-id>
          <pub-id pub-id-type="pmcid">PMC7025365</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Systematic evaluation of research progress on natural language processing in medicine over the past 20 years: bibliometric study on PubMed</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>01</month>
          <day>23</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>e16816</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/1/e16816/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16816</pub-id>
          <pub-id pub-id-type="medline">32012074</pub-id>
          <pub-id pub-id-type="pii">v22i1e16816</pub-id>
          <pub-id pub-id-type="pmcid">PMC7005695</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Marrying medical domain knowledge with deep learning on electronic health records: a deep visual analytics approach</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>09</month>
          <day>28</day>
          <volume>22</volume>
          <issue>9</issue>
          <fpage>e20645</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/9/e20645/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20645</pub-id>
          <pub-id pub-id-type="medline">32985996</pub-id>
          <pub-id pub-id-type="pii">v22i9e20645</pub-id>
          <pub-id pub-id-type="pmcid">PMC7551124</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Use of BERT (Bidirectional Encoder Representations from Transformers)-based deep learning method for extracting evidences in chinese radiology reports: development of a computer-aided liver cancer diagnosis framework</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>01</month>
          <day>12</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e19689</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e19689/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19689</pub-id>
          <pub-id pub-id-type="medline">33433395</pub-id>
          <pub-id pub-id-type="pii">v23i1e19689</pub-id>
          <pub-id pub-id-type="pmcid">PMC7837998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hinz</surname>
              <given-names>ERM</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Eyler</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Applying active learning to high-throughput phenotyping algorithms for electronic health records data</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <month>12</month>
          <volume>20</volume>
          <issue>e2</issue>
          <fpage>e253</fpage>
          <lpage>e259</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23851443"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001945</pub-id>
          <pub-id pub-id-type="medline">23851443</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001945</pub-id>
          <pub-id pub-id-type="pmcid">PMC3861916</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ćirković</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of four artificial intelligence-assisted self-diagnosis apps on three diagnoses: two-year follow-up study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>12</month>
          <day>04</day>
          <volume>22</volume>
          <issue>12</issue>
          <fpage>e18097</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/12/e18097/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18097</pub-id>
          <pub-id pub-id-type="medline">33275113</pub-id>
          <pub-id pub-id-type="pii">v22i12e18097</pub-id>
          <pub-id pub-id-type="pmcid">PMC7748958</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Munsch</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gruarin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nateqi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Abdarahmane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Weingartner-Ortner</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Knapp</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Diagnostic accuracy of web-based COVID-19 symptom checkers: comparison study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>06</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e21299</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e21299/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21299</pub-id>
          <pub-id pub-id-type="medline">33001828</pub-id>
          <pub-id pub-id-type="pii">v22i10e21299</pub-id>
          <pub-id pub-id-type="pmcid">PMC7541039</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <source>Database of PhenoSSU</source>
          <access-date>2021-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jianglab.tech/PhenoSSU/">https://www.jianglab.tech/PhenoSSU/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <source>Scripts for PhenoSSU</source>
          <access-date>2021-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/denglizong/scripts-for-PhenoSSU">https://github.com/denglizong/scripts-for-PhenoSSU</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
