<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v21i5e13504</article-id>
      <article-id pub-id-type="pmid">31140433</article-id>
      <article-id pub-id-type="doi">10.2196/13504</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Discovering Clinical Information Models Online to Promote Interoperability of Electronic Health Records: A Feasibility Study of OpenEHR</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lu</surname>
            <given-names>Xudong</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Moner</surname>
            <given-names>David</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Tognola</surname>
            <given-names>Gabriella</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lee</surname>
            <given-names>Jaehoon</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Correia</surname>
            <given-names>Ricardo</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Goossen</surname>
            <given-names>William</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="author" id="contrib1">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Lin</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9211-0079</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib2">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Xiaoshuo</given-names>
          </name>
          <degrees>BEng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1197-5770</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib3" corresp="yes">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Jiao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Institute of Medical Information / Medical Library</institution>
            <institution>Chinese Academy of Medical Sciences &amp; Peking Union Medical College</institution>
            <addr-line>No 3 Yabao Road, Chaoyang District</addr-line>
            <addr-line>Beijing, 100020</addr-line>
            <country>China</country>
            <phone>86 18618461596</phone>
            <email>li.jiao@imicams.ac.cn</email>
          </address>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6391-8343</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
      <label>1</label>
      <institution>Institute of Medical Information / Medical Library</institution>
      <institution>Chinese Academy of Medical Sciences &amp; Peking Union Medical College</institution>  
      <addr-line>Beijing</addr-line>
      <country>China</country></aff>
      <author-notes>
        <corresp>Corresponding Author: Jiao Li 
        <email>li.jiao@imicams.ac.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection"><month>05</month><year>2019</year></pub-date>
      <pub-date pub-type="epub">
        <day>28</day>
        <month>05</month>
        <year>2019</year>
      </pub-date>
      <volume>21</volume>
      <issue>5</issue>
      <elocation-id>e13504</elocation-id>
      <!--history from ojs - api-xml-->
      <history>
        <date date-type="received">
          <day>27</day>
          <month>1</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>21</day>
          <month>2</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>18</day>
          <month>4</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>2</day>
          <month>5</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Lin Yang, Xiaoshuo Huang, Jiao Li. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 28.05.2019.</copyright-statement>
      <copyright-year>2019</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2019/5/e13504/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Clinical information models (CIMs) enabling semantic interoperability are crucial for electronic health record (EHR) data use and reuse. Dual model methodology, which distinguishes the CIMs from the technical domain, could help enable the interoperability of EHRs at the knowledge level. How to help clinicians and domain experts discover CIMs from an open repository online to represent EHR data in a standard manner becomes important.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to develop a retrieval method to identify CIMs online to represent EHR data.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We proposed a graphical retrieval method and validated its feasibility using an online CIM repository: openEHR Clinical Knowledge Manager (CKM). First, we represented CIMs (archetypes) using an extended Bayesian network. Then, an inference process was run in the network to discover relevant archetypes. In the evaluation, we defined three retrieval tasks (medication, laboratory test, and diagnosis) and compared our method with three typical retrieval methods (BM25F, simple Bayesian network, and CKM), using mean average precision (MAP), average precision (AP), and precision at 10 (P@10) as evaluation metrics.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We downloaded all available archetypes from the CKM. Then, the graphical model was applied to represent the archetypes as a four-level clinical resources network. The network consisted of 5513 nodes, including 3982 data element nodes, 504 concept nodes, 504 duplicated concept nodes, and 523 archetype nodes, as well as 9867 edges. The results showed that our method achieved the best MAP (MAP=0.32), and the AP was almost equal across different retrieval tasks (AP=0.35, 0.31, and 0.30, respectively). In the diagnosis retrieval task, our method could successfully identify the models covering “diagnostic reports,” “problem list,” “patients background,” “clinical decision,” etc, as well as models that other retrieval methods could not find, such as “problems and diagnoses.”</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The graphical retrieval method we propose is an effective approach to meet the uncertainty of finding CIMs. Our method can help clinicians and domain experts identify CIMs to represent EHR data in a standard manner, enabling EHR data to be exchangeable and interoperable.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>openEHR</kwd>
        <kwd>clinical information model</kwd>
        <kwd>health information interoperability</kwd>
        <kwd>information retrieval</kwd>
        <kwd>probabilistic graphical model</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Electronic health record (EHR) data can be used and reused for many purposes, including managing an individual patient’s care, medical and health services research, and management of health care facilities. More recently, EHR data has been defined as a part of real-world data [<xref ref-type="bibr" rid="ref1">1</xref>] and is increasingly seen as a viable source of data for regulatory decisions [<xref ref-type="bibr" rid="ref2">2</xref>]. However, bias can occur in different steps of the data chain, which might lead to incomparable or invalid analysis results [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      <p>Semantic interoperability is essential for accurate and advanced health-related computing, shared EHRs, and coordination of clinical care across clinical systems [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. According to ISO/TS 18308 (a standard published by the International Organization for Standardization defining the set of requirements for EHR architecture), it is the ability for data shared by systems to be understood at the level of fully defined domain concepts [<xref ref-type="bibr" rid="ref6">6</xref>]. To achieve this, a two-level clinical modeling methodology is proposed to separate clinical knowledge from information models [<xref ref-type="bibr" rid="ref7">7</xref>]. It distinguishes two models: the reference model (RM), which contains the basic and stable properties of health record information, and the clinical information model (CIM), which formally defines clinical concepts (or domain content models) in a standardized and reusable manner, such as blood pressure [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. In this scenario, CIMs in agreement at an organizational, regional, national, or international level will provide a firm basis for establishing semantic interoperability [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>This two-level modeling approach is used in the ISO/CEN EN13606 (a standard designed to achieve semantic interoperability in EHR communication) [<xref ref-type="bibr" rid="ref10">10</xref>] and openEHR (described subsequently) [<xref ref-type="bibr" rid="ref11">11</xref>], as well as Health Level Seven (HL7) version 3 Clinical Document Architecture (HL7's primary standard for representing structured clinical documentation on patients) and Care Provision messages (information structures used to communicate information between providers of care) [<xref ref-type="bibr" rid="ref12">12</xref>]. For openEHR and ISO/CEN EN13606, CIMs are defined in the form of archetypes, whereas those of HL7 are in the form of HL7 templates. According to the systematic review done by Moreno-Conde et al [<xref ref-type="bibr" rid="ref13">13</xref>], archetypes are the preferred type of technical artifacts, and openEHR is most frequently mentioned. Therefore, CIMs in our study specifically refer to openEHR archetypes.</p>
      <p>OpenEHR is an open-source EHR standard ensuring universal interoperability among all forms of electronic data [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. It is well known for its two-level design paradigm, consisting of an RM, archetypes, and templates. <italic>Archetypes</italic> are computable clinical content specifications that formalize the patterns and requirements for the representation of health information content [<xref ref-type="bibr" rid="ref9">9</xref>]. To achieve common, coherent, and clinician-approved archetypes, the openEHR community provides a Web-based controlled authoring environment for a wide range of domain experts, especially clinicians, to participate in the creation of archetypes. All contributions are open access and freely available under a Creative Commons license. Archetypes are general purpose, reusable, and composable; therefore, searching for reusable archetypes from archetype repositories is essential throughout the development process [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. Documents with complete archetype design specifications are the input; lists of existing reusable archetypes, either complete or needing modifications, and new archetypes to be developed from scratch are the output [<xref ref-type="bibr" rid="ref23">23</xref>]. The crucial problem is how to find the relevant ones from open repositories to help identify reusable archetypes.</p>
      <p>The openEHR community provides the Clinical Knowledge Manager (CKM) [<xref ref-type="bibr" rid="ref24">24</xref>] to be a library of openEHR archetypes. It supports their retrieval based on clinical concepts in different sections of archetypes. When the end user enters a term, the CKM will return the archetype that contains the word in metadata, definition, or ontology section. It could help find reusable archetypes [<xref ref-type="bibr" rid="ref25">25</xref>]. However, domain experts are mainly concerned about whether the concept name and core data items are covered [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>], and they may be not familiar with openEHR archetypes, especially clinicians. For better results, end users usually need to do a large amount of preparatory work, which may include classifying and rearranging data [<xref ref-type="bibr" rid="ref27">27</xref>], abstracting clinical concepts from data schemas [<xref ref-type="bibr" rid="ref17">17</xref>], and identifying archetype-friendly concepts from clinical statements [<xref ref-type="bibr" rid="ref26">26</xref>]. It is an iterative and time-consuming process.</p>
      <p>We aimed to develop a retrieval method to identify archetypes online to represent EHR data and optimize existing retrieval results of the CKM. Archetypes usually have their own hierarchical structures, and semantic relationships occur between different archetypes; therefore, we considered that the graphical representation of this potential knowledge might support the retrieval of CIMs. Previous studies show that graphs could efficiently represent clinical knowledge [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>], and the Bayesian network, as a probabilistic graphical model, is an effective methodology to meet the uncertainty of information needs. Rotmensch et al [<xref ref-type="bibr" rid="ref30">30</xref>] used a naive Bayes classifier and a Bayesian network to automatically construct a health knowledge graph from electronic medical records. However, in retrieval tasks, differences between Bayesian network-based information retrieval methods mainly lie in the structure of the network, and this structure depends on dependencies between the variables involved in the problem. The basic Bayesian network consists of two different sets of variables, a set of indexing terms and a set of documents in the collection, and the relationships between them [<xref ref-type="bibr" rid="ref31">31</xref>]. Related research has been conducted to extend a simple Bayesian network for better results. Some methods focus on the structure of the term subnetwork using a polytree [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>] or two term layers [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] to represent term relationships. Some focus on the structure of the document subnetwork using two document layers [<xref ref-type="bibr" rid="ref36">36</xref>] to represent document relationships. Compared with the previous studies, we focused on the probabilistic graphical representation of openEHR archetype sets, which depends on relationships between the variables involved in finding relevant archetypes, and how the inference process is carried out, aiming for better retrieval performance.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Information Need Analysis</title>
        <p>To find relevant archetypes from the open repository, we first had to understand which kinds of terms end users tended to enter. As archetype modeling methodology [<xref ref-type="bibr" rid="ref23">23</xref>] shows, domain experts identify core clinical concepts and related data elements involved in a particular scenario and organize them into mind maps or design tables. These archetype design specifications are the main source of search keywords. We considered that the input of end users was mainly the names of clinical concepts or related data elements.</p>
        <p>Ideally, the user enters the clinical concept and the system feeds back the archetype defining the concept, or the user enters data elements related to a concept and the system feeds back the archetype that covers all the data elements. However, it is difficult to distinguish clinical concepts and data elements from the end user’s input, unless it forces users to input separately. More importantly, data elements defined by end users may be the concept in an archetype repository, or the defined concept is the data element of an archetype. If we match concepts and data elements separately, users may miss some important relevant archetypes.</p>
        <p>Based on these considerations, we tried to translate the problem into identifying potentially relevant clinical concepts from the input. We proposed to reorganize the archetype collection with the dependencies between clinical concepts, data elements, and archetypes and used a probabilistic approach to meet the uncertainty of user information needs.</p>
      </sec>
      <sec>
        <title>Graphical Retrieval Method Based on an Extended Bayesian Network</title>
        <sec>
          <title>Archetype Feature Identification and Extraction</title>
          <p>Based on information need analysis, we attempted to use clinical concepts and data elements to represent each archetype. An archetype is expressed in Archetype Definition Language (ADL) and mainly consists of three sections (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The header contains a unique identifier for the archetype and includes some descriptive information, such as concept name and keywords; the definition contains the main formal definition of the archetype, including all possible data elements that could be relevant for the clinical concept; and the ontology contains the code that represents the meaning of nodes. We considered that clinical concepts were the topics of archetypes, whereas keywords and data elements explained the meaning of topics from different perspectives. Thus, we extracted archetype ID, concepts, keywords, and data elements based on ADL files parsing as features (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p>
          <p>There are also relationships between archetypes, including specialization and aggregation. An archetype is a specialization of another if it mentions that archetype as its parent and only makes changes to its definition. Aggregation enables any subset of archetypes to be stated as the allowed set for use in a compositional parent archetype. In general, archetypes tend to provide highly reusable models of real-world content with local constraining left to templates, which may result in matching as many archetypes as possible when defining archetype slots. For example, “openEHR-EHR-CLUSTER.device_details.v1” allows the inclusion of 199 archetypes. We thought that such cases might blur the semantic relationship between archetypes. In addition, version control is an integral part of the openEHR architecture. When an archetype updates, the old version could not be found in the archetype library. Therefore, we only added the parent archetype ID as the feature (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p>
          <p>Furthermore, there are four main categories of archetypes, including COMPOSITION, SECTION, ENTRY, and CLUSTER, each defined as part of the openEHR RM. A COMPOSITION is a container class, whereas a SECTION is an organizing class, each containing ENTRY objects [<xref ref-type="bibr" rid="ref16">16</xref>]. The ENTRY class is further specialized into ADMIN_ENTRY, OBSERVATION, EVALUATION, INSTRUCTION, and ACTION subclasses, of which the latter four are kinds of CARE_ENTRY. CLUSTERS are reusable archetypes for use within any ENTRY or other CLUSTER. In addition, the openEHR designs Demographic archetypes for demographic information. Thereby, archetypes could be mainly divided into COMPOSITION, SECTION, ENTRY, CLUSTER, and DEMOGRAPHIC. However, these archetype categories will not obscure the clinical content, and we did not use these as the feature.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>An example of archetype feature identification and extraction.</p>
            </caption>
            <graphic xlink:href="jmir_v21i5e13504_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Clinical Resources Network Modeling</title>
          <p>We attempted to use a three-level Bayesian network to represent the dependencies among data elements, concepts, and archetypes (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The first is the data element layer. It contains the set of indexing data elements T={<italic>T</italic><sub>i</sub>, i=1...M}, <italic>M</italic> being the number of data elements from a given archetype collection. Each data element node is linked to its corresponding concept node in the clinical concept layer. The second is the clinical concept layer. It contains the set of indexing concepts C={<italic>C</italic><sub>j</sub>, j=1...N}, <italic>N</italic> being the number of concepts. The third layer contains the set of archetypes A={<italic>A</italic><sub>k</sub>, k=1...K}, <italic>K</italic> being the total number of archetypes in the collection. If <italic>A</italic><sub>k</sub> is a specialization of another archetype <italic>A</italic><sub>p</sub> which defines <italic>C</italic><sub>j</sub>, there is a link joining any concept node <italic>C</italic><sub>j</sub> and any archetype node <italic>A</italic><sub>k</sub>.</p>
          <p>However, data elements are unevenly distributed across different types of archetypes, especially for container classes. When two archetypes have few data elements and terms used are totally different, such as “openEHR-EHR-COMPOSITION .medication_list.v0” and “openEHR-EHR-SECTION.medication _order_list.v0,” it is difficult to find correlation between them.</p>
          <p>Therefore, we tried to include relationships between concepts in the model to extend the similarity between archetypes. Relationships between concepts were measured by estimating conditional probabilities of relevance of every concept given that another concept was considered relevant [<xref ref-type="bibr" rid="ref36">36</xref>]. Let <italic>e</italic> (<italic>C</italic><sub>i</sub>) be an event representing some type of evidence about the relevance of a concept <italic>C</italic><sub>i</sub>. In openEHR, the evidence could be “keywords,” “purpose,” “use,” or other semantic information. In this case, we considered that <italic>e</italic> (<italic>C</italic><sub>i</sub>) as the event [<italic>KW</italic><sub>l</sub>= <italic>kw</italic><sub>l,</sub> ∀ <italic>KW</italic><sub>l</sub>∈ <italic>C</italic><sub>i</sub>], <italic>KW</italic> being the keywords used to describe the concept. Given a concept <italic>C</italic><sub>j</sub>, we calculated the probabilities <italic>p</italic> (<italic>c</italic><sub>j</sub>| <italic>e</italic> (<italic>C</italic><sub>i</sub>)) ∀ <italic>C</italic><sub>i</sub>∈ <italic>C</italic> using equation (a) in <xref ref-type="fig" rid="figure3">Figure 3</xref>, where the weight was computed by equation (d) in <xref ref-type="fig" rid="figure3">Figure 3</xref> and <italic>M</italic><sub>k</sub> was the number of keywords. After decreasing the ordering of <italic>p</italic>(<italic>c</italic><sub>j</sub>|<italic>e</italic>(<italic>C</italic><sub>i</sub>)), the top n concepts <italic>R</italic><sub>n</sub>(<italic>C</italic><sub>j</sub>) were the ones that were more related to <italic>C</italic><sub>j</sub>. Then, we included in the network-explicit dependence relationships between <italic>C</italic><sub>j</sub> and each concept <italic>C</italic><sub>i</sub>∈<italic>R</italic><sub>n</sub>(<italic>C</italic><sub>j</sub>).</p>
          <p>To determine the topology of the Bayesian network, we used a concept subnetwork with two layers instead of the original concept layer. We duplicated each concept node <italic>C</italic><sub>j</sub> to obtain another concept node <italic>C</italic><sup>ʹ</sup><sub>j</sub>, thus forming a new concept layer, and the arcs connecting the two layers went from <italic>C</italic><sub>i</sub>∈<italic>R</italic><sub>n</sub>(<italic>C</italic><sub>j</sub>) to <italic>C</italic><sup>ʹ</sup><sub>j</sub>. Thus, this directed acyclic graph had the set of variables V=T∪C∪C<sup>ʹ</sup>∪A. The new topology avoids connections between nodes in the same layer and facilitates the inference process.</p>
          <p>The overall modeling procedure is summarized in <xref ref-type="fig" rid="figure4">Figure 4</xref>. First, we extracted archetype ID, clinical concept, and data elements from the ADL files (detailed in section “archetype feature identification and extraction”). Second, we learned the dependencies between concepts (detailed previously). Third, we graphically represented the dependencies between the variables.</p>
        </sec>
        <sec>
          <title>Parameters Estimation in the Clinical Resources Network</title>
          <p>In this section, we will discuss how to estimate the probability distributions of each node in the network.</p>
          <sec>
            <title>Data Element Nodes</title>
            <p>A data element node has no parents; therefore, we had to store the probability of relevance <italic>p</italic> (<italic>t</italic><sub>i</sub>) and the probability of being nonrelevant. We used the estimator (<xref ref-type="fig" rid="figure3">Figure 3</xref>, equation b), where <italic>M</italic> is the number of terms used to index the concept collection.</p>
            <fig id="figure2" position="float">
              <label>Figure 2</label>
              <caption>
                <p>Topology of three-level clinical resources network. A: archetype; C: clinical concept; T: data element.</p>
              </caption>
              <graphic xlink:href="jmir_v21i5e13504_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
            <fig id="figure3" position="float">
              <label>Figure 3</label>
              <caption>
                <p>Equations used in our method.</p>
              </caption>
              <graphic xlink:href="jmir_v21i5e13504_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
            <fig id="figure4" position="float">
              <label>Figure 4</label>
              <caption>
                <p>Clinical resources network modeling pipeline. A: archetype; C: clinical concept; Cʹ: duplicated clinical concept; T: data element.</p>
              </caption>
              <graphic xlink:href="jmir_v21i5e13504_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Concept Nodes</title>
            <p>For each concept node <italic>C</italic><sub>j</sub> in the concept subnetwork, we needed to estimate a set of conditional probability distributions <italic>p</italic> (<italic>c</italic><sub>j</sub>|<italic>pa</italic>(<italic>C</italic><sub>j</sub>)). <italic>Pa</italic>(<italic>C</italic><sub>j</sub>) represents the parent nodes set of concept <italic>C</italic><sub>j</sub>, containing all the data elements belonging to concept <italic>C</italic><sub>j</sub>, and <italic>pa</italic> (<italic>C</italic><sub>j</sub>) is a possible configuration of value associated with the parent set <italic>Pa</italic>(<italic>C</italic><sub>j</sub>). We used the estimator (<xref ref-type="fig" rid="figure3">Figure 3</xref>, equations c and d) proposed by De Campos et al [<xref ref-type="bibr" rid="ref33">33</xref>], where α is a normalizing constant (assure ∑<sub>Ti</sub><sub>∈</sub><sub>Pa(Cj)</sub><italic>w</italic><sub>ij</sub>≤1 ∀ <italic>C</italic><sub>j</sub><italic>∈</italic> <italic>C</italic>), <italic>tf</italic><sub>ij</sub> is the term frequency of data element <italic>T</italic><sub>i</sub> in concept <italic>C</italic><sub>j</sub>, and <italic>idf</italic><sub>i</sub> is the inverse concept frequency of <italic>T</italic><sub>i</sub> in the whole concept collection; <italic>idf</italic><sub>i</sub> = <italic>1</italic> + <italic>log</italic> (<italic>N</italic> / <italic>n</italic><sub>i</sub>), N being the total number of concepts, and <italic>n</italic><sub>i</sub> being the total number of concepts containing <italic>T</italic><sub>i</sub>.</p>
            <p>For each concept node <italic>C</italic><sup>ʹ</sup><sub>j</sub>, we need to estimate a set of conditional probability distributions <italic>p</italic>(<italic>c</italic><sup>ʹ</sup><sub>j</sub>|pa(<italic>C</italic><sup>′</sup><sub>j</sub>)). We used the estimator (<xref ref-type="fig" rid="figure3">Figure 3</xref>, equation e) proposed by Acid et al [<xref ref-type="bibr" rid="ref36">36</xref>], where <italic>S</italic><sub>j</sub> = ∑<sub>Ck</sub><sub>∈</sub><sub>Pa(C′j)</sub><italic>p</italic>(<italic>c</italic><sub>j</sub>|<italic>e</italic>(<italic>C</italic><sub>k</sub>)) and the values <italic>p</italic>(<italic>c</italic><sub>j</sub>|<italic>e</italic>(<italic>C</italic><sub>k</sub>)) are obtained when modeling the network.</p>
          </sec>
          <sec>
            <title>Archetype Nodes</title>
            <p>For each archetype node <italic>A</italic><sub>k</sub>, we needed to estimate a set of conditional probability distributions <italic>p</italic>(<italic>a</italic><sub>k</sub>| <italic>pa</italic>(<italic>A</italic><sub>k</sub>)). <italic>Pa</italic>(<italic>A</italic><sub>k</sub>) represents the parent node sets of archetype <italic>A</italic><sub>k</sub>, containing all the concepts belonging to archetype <italic>A</italic><sub>k</sub>, and <italic>pa</italic>(<italic>A</italic><sub>k</sub>) is a possible configuration of values associated with the parent set <italic>Pa</italic> (<italic>A</italic><sub>k</sub>). <italic>v</italic><sub>jk</sub> is a constant to represent the weight of a concept for an archetype. The estimator is shown in <xref ref-type="fig" rid="figure3">Figure 3</xref>, equations (f) and (g), where <italic>R</italic>(<italic>Pa</italic>(<italic>A</italic><sub>k</sub>), <italic>A</italic><sub>k</sub>) represents two different relationships between the concept and archetype, <italic>n</italic><sub>1</sub> is the number of “nonspecialized” archetypes of one concept, and <italic>n</italic><sub>2</sub> is the number of “specialized” archetypes, whereas α and β are coefficients for the weight.</p>
          </sec>
        </sec>
        <sec>
          <title>Relevant Archetype Discovering: Inference in the Clinical Resources Network</title>
          <p>To find relevant archetypes is to estimate the probability of relevance <italic>p</italic> (<italic>a</italic><sub>k</sub>|Q) for each archetype, <italic>Q</italic> being an end user query.</p>
          <p>Given a query <italic>Q</italic>, the set of terms used to formulate the query will be a new piece of evidence. The retrieval process starts by placing the evidence in the data element subnetwork. Then, the inference process is run in the clinical resources network. This allows us to obtain the probability of relevance of each archetype, given that the terms in the query are relevant, <italic>p</italic> (<italic>a</italic><sub>k</sub>|Q). Finally, the archetypes will be sorted in decreasing order of probability to carry out the evaluation process. The inference process is composed of four stages.</p>
          <list list-type="order">
            <list-item>
              <p>Terms in the data element layer are marginally independent; therefore, the probability of relevance <italic>p</italic>(<italic>t</italic><sub>i</sub>|Q) is calculated by equation (h) in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p>
            </list-item>
            <list-item>
              <p>Based on the propagation process, the conditional probability of concept <italic>C</italic><sub>j</sub> in the concept subnetwork for the query Q could be calculated by equation (i) in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p>
            </list-item>
            <list-item>
              <p>The conditional probability of concept <italic>C</italic><sup>ʹ</sup><sub>j</sub> in the concept subnetwork for the query Q could be computed using equation (g) in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p>
            </list-item>
            <list-item>
              <p>The conditional probability of archetype <italic>A</italic><sub>k</sub> for the query Q, <italic>p</italic>(<italic>a</italic><sub>k</sub>|Q) could be carried out using information obtained in the previous step by the equation (k) in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p>
            </list-item>
          </list>
          <p>Therefore, the propagation with this topology is to evaluate equations (h), (i), (g), and (k) in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p>
        </sec>
      </sec>
      <sec>
        <title>Experiment Setup</title>
        <sec>
          <title>Test Queries</title>
          <p>We defined test queries with the following considerations: first, clinical concepts to be retrieved should be essential components of the EHR; second, there should be needs to reuse these clinical contents [<xref ref-type="bibr" rid="ref37">37</xref>], such as medical events prediction [<xref ref-type="bibr" rid="ref38">38</xref>], clinical research [<xref ref-type="bibr" rid="ref39">39</xref>], and disease research [<xref ref-type="bibr" rid="ref40">40</xref>]; third, queries should allow us to test the performance of retrieval methods in related archetypes identification, including specialized archetypes and compositional parent archetypes. Based on these criteria, we selected medication, laboratory test, and diagnosis as retrieval tasks and formulated three queries (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        </sec>
        <sec>
          <title>Data Source</title>
          <p>We downloaded all available archetypes from the CKM [<xref ref-type="bibr" rid="ref24">24</xref>] for a total of 526 on August 30, 2018. All files were in ADL format. We used the ADL parser [<xref ref-type="bibr" rid="ref41">41</xref>] to extract features. Among these CIMs, three archetypes did not use English as the description language, so the total number changed to 523.</p>
        </sec>
        <sec>
          <title>Relevance Assessment</title>
          <p>To evaluate retrieval results, we first had to identify relevant archetypes in three retrieval tasks as the gold standard. We manually annotated all 523 archetypes, according to their relevance to each query, to formulate three benchmark datasets. Given a query and an archetype, three annotators were asked to judge if the archetype was relevant. The labeling instructions were as follows: a label was relevant when the archetype could cover the potential clinical concept inferred from the given query; a label was nonrelevant otherwise. We took the majority vote to decide the relevance of an archetype. These three benchmark datasets were used as ground truth for the medication, laboratory test, and diagnosis retrieval tasks.</p>
        </sec>
        <sec>
          <title>Baseline Methods</title>
          <p>To validate the performance of our method, three typical retrieval methods were selected as baselines: CKM, BM25F, and simple Bayesian network.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Test queries.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="200"/>
              <col width="260"/>
              <col width="540"/>
              <thead>
                <tr valign="top">
                  <td>Query</td>
                  <td>Retrieval task</td>
                  <td>Input terms</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>1</td>
                  <td>Medication</td>
                  <td>Medicine name, total daily amount, allowed period, and order start date/time</td>
                </tr>
                <tr valign="top">
                  <td>2</td>
                  <td>Laboratory test</td>
                  <td>Report, test name, and test results</td>
                </tr>
                <tr valign="top">
                  <td>3</td>
                  <td>Diagnosis</td>
                  <td>Problem/diagnosis, test diagnosis, date/time of onset, and body site</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>BM25F is an extension of the BM25 ranking function, which is applicable to structured documents consisting of multiple fields. It combines the term frequencies (weighted accordingly to their field importance) and uses the resulting pseudofrequency in the BM25 ranking function. In this study, we supposed that an archetype was decomposed into two fields, concept and data elements, and used the function (<xref ref-type="fig" rid="figure3">Figure 3</xref>, equations l and m) proposed by Zaragoza et al [<xref ref-type="bibr" rid="ref42">42</xref>], where <italic>w</italic><sub>ti</sub> is the RSJ relevance weight for term <italic>t</italic><sub>i</sub>, <italic>x</italic><sub>ak, f, ti</sub> is the term frequency of term <italic>t</italic><sub>i</sub> in the field type <italic>f</italic> of archetype <italic>a</italic><sub>k</sub>, <italic>l</italic><sub>ak, f</sub> is the length of that field, <italic>l</italic><sub>f</sub> is the average field length for that field type, and <italic>B</italic><sub>f</sub> is a field-dependent parameter.</p>
          <p>For the Bayesian network, the structure is illustrated in <xref ref-type="fig" rid="figure2">Figure 2</xref>. The propagation with this topology is to evaluate equations (h), (i), and (k) in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview of Clinical Resources Network</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the distribution of archetypes across different clinical domains.<italic>Clinical domain classification</italic> refers to the concept schema proposed by Hruby et al [<xref ref-type="bibr" rid="ref39">39</xref>].</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the distribution of archetypes, concepts, and data elements across different types of archetypes in the collection. In addition, there were 31 specialized archetypes, 11 of whose parent archetypes are no longer in the CKM.</p>
        <p>Then, we learned the dependencies between concepts. <xref ref-type="table" rid="table4">Table 4</xref> shows the top relevant concepts suggested by four different percentages of values of <italic>p</italic>(<italic>c</italic><sub>j</sub>|<italic>e</italic>(<italic>C</italic><sub>i</sub>)) for “dosage” and “examination of a lung,” respectively.</p>
        <p>After that, we constructed four clinical resource networks, G<sub>1</sub>, G<sub>2</sub>, G<sub>3</sub>, and G<sub>4</sub>, according to the top 3%, 5%, 8%, and 10% of values, respectively. Each graph consisted of 5513 nodes, which were 3982 data element nodes, 504 concept nodes, 504 duplicated concept nodes, and 523 archetype nodes, with 6366 edges from T to C and 543 edges from Cʹ to A. For edges C to Cʹ, G<sub>1</sub> had 1590 arcs, G<sub>2</sub> had 2485 arcs, G<sub>3</sub> had 2958 arcs, and G<sub>4</sub> had 3263 arcs.</p>
      </sec>
      <sec>
        <title>Evaluation of the Performance</title>
        <p>To compare the performance of different graphs in supporting retrieval, we calculated the average precision (AP) values for the 11 standard recall points of each graph for the test queries and then computed the mean average precision (MAP) values. The results (<xref ref-type="table" rid="table5">Table 5</xref>) showed that the retrieval method based on G<sub>3</sub> achieved the best MAP (MAP=0.32), with an AP of 0.35, 0.31, and 0.3 for each query, respectively.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Distribution of archetypes across different clinical domains.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="720"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Clinical domain and subdomains</td>
                <td>Archetypes, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2"><bold>Patient</bold></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Demographic</td>
                <td>42</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Health characteristic</td>
                <td>32</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Patient</td>
                <td>6</td>
              </tr>
              <tr valign="top">
                <td colspan="2"><bold>Pretreatment diagnosis</bold></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Clinical assessment</td>
                <td>73</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Pretreatment diagnosis</td>
                <td>26</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Procedure</td>
                <td>6</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Intent</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td colspan="2"><bold>Treatment</bold></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Treatment</td>
                <td>39</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Prescribed</td>
                <td>12</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Surgery</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Detection/Treatment results</td>
                <td>184</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Organizational/Provider characteristics</td>
                <td>26</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Outcomes</td>
                <td>24</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Patient environment factors</td>
                <td>6</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Other</td>
                <td>37</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Total</td>
                <td>523</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Distribution of archetypes, concepts, and data elements.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="270"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Archetype type subtypes</td>
                <td>Archetypes, n</td>
                <td>Concepts, n</td>
                <td>Elements, n</td>
                <td>Data elements per concept, mean</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Cluster</td>
                <td>198</td>
                <td>198</td>
                <td>1567</td>
                <td>7.9</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Composition</td>
                <td>25</td>
                <td>25</td>
                <td>45</td>
                <td>1.8</td>
              </tr>
              <tr valign="top">
                <td colspan="2"><bold>Entry</bold></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Action</td>
                <td>15</td>
                <td>15</td>
                <td>252</td>
                <td>16.8</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Evaluation</td>
                <td>51</td>
                <td>51</td>
                <td>432</td>
                <td>8.5</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Observation</td>
                <td>164</td>
                <td>163</td>
                <td>1511</td>
                <td>9.3</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Instruction</td>
                <td>8</td>
                <td>8</td>
                <td>124</td>
                <td>15.5</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Admin</td>
                <td>4</td>
                <td>4</td>
                <td>69</td>
                <td>17.3</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Section</td>
                <td>26</td>
                <td>26</td>
                <td>88</td>
                <td>3.4</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Demographic</td>
                <td>32</td>
                <td>29</td>
                <td>169</td>
                <td>5.8</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Total</td>
                <td>523</td>
                <td>504</td>
                <td>3982</td>
                <td>7.9</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Top edge suggestions for “dosage” and “examination of lung.”</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="210"/>
            <col width="180"/>
            <col width="180"/>
            <col width="180"/>
            <col width="250"/>
            <thead>
              <tr valign="bottom">
                <td>Clinical concept</td>
                <td colspan="4">Different threshold of <italic>p</italic>(<italic>c</italic><sub>j</sub>|<italic>e(C</italic><sub>i</sub>))<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Top 3%</td>
                <td>Top 5%</td>
                <td>Top 8%</td>
                <td>Top 10%</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Dosage</td>
                <td>Dosage</td>
                <td>Dosage</td>
                <td>Dosage</td>
                <td>Dosage</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Medication order</td>
                <td>Medication order</td>
                <td>Medication order</td>
                <td>Medication order</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td><break/></td>
                <td>Therapeutic direction</td>
                <td>Therapeutic direction</td>
                <td>Therapeutic direction</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td>Medication</td>
                <td>Medication</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td>Medication authorization</td>
                <td>Medication authorization</td>
              </tr>
              <tr valign="top">
                <td>Examination of lung</td>
                <td>Examination of a lung</td>
                <td>Examination of a lung</td>
                <td>Examination of a lung</td>
                <td>Examination of a lung</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Auscultation of lung</td>
                <td>Auscultation of lung</td>
                <td>Auscultation of lung</td>
                <td>Auscultation of lung</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Pulmonary function test</td>
                <td>Pulmonary function test</td>
                <td>Pulmonary function test</td>
                <td>Pulmonary function test</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td rowspan="2">Macroscopic findings-lung cancer</td>
                <td rowspan="2">Macroscopic findings-lung cancer</td>
                <td rowspan="2">Macroscopic findings-lung cancer</td>
                <td>Macroscopic findings-lung cancer</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>Examination findings-posterior chamber of eye</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td>Examination of a breast</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td><break/></td>
                <td>Examination of a burn</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>c<sub>j</sub>=”dosage” and “examination of lung,” respectively.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Next, we compared the results of our method based on G<sub>3</sub> with baseline methods. To comprehensively validate the performance, we selected the MAP, AP, and precision at 10 (P@10) as evaluation metrics. Archetypes in the CKM are updated regularly, so it is difficult for us to compare the result on the same collection. We searched relevant archetypes in the CKM for the three queries given on December 12, 2018, and evaluated its performance against the ground truth. The result (<xref ref-type="table" rid="table6">Table 6</xref>) shows that our method outperforms all the baseline methods, achieving the best AP and P@10 across different test queries, as well as the best MAP. For instance, for query 1, our method, CKM, Bayesian network, and BM25F achieved a P@10 of 0.50, 0.40, 0.20, and 0.20, respectively. Furthermore, we can observe that the MAP of BM25F (MAP=0.177) and Bayesian network (MAP=0.127) was lower than that of CKM (MAP=0.227), which means that there are limitations in using clinical concepts and data elements to represent each archetype. Our approach takes into account the semantic associations between concepts and effectively compensates for this deficiency.</p>
        <p>The same trend is observed when evaluating precision-recall graphs across all test queries. <xref ref-type="fig" rid="figure5">Figure 5</xref> shows the precision-recall curves evaluated against the ground truth. Here, BM25F falls short in performance. For instance, for a recall of 0.3, our method, CKM, Bayesian network, and BM25F achieved a precision of 0.38, 0.30, 0.05, and 0, respectively. Additionally, the 11-point MAP curve of the Bayesian network is similar to that of our approach, but the performance is much worse than ours. Meanwhile, compared with the curve of the CKM, our curve is smoother and has higher precision when the recall is below 0.6. These results may be explained by the fact that dependencies between concepts could help identify relevant archetypes.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Average precision performance of graphs with different similarity thresholds.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="320"/>
            <col width="180"/>
            <col width="170"/>
            <col width="180"/>
            <col width="150"/>
            <thead>
              <tr valign="bottom">
                <td>Graphs with different similarity thresholds<sup>a</sup></td>
                <td>Mean average precision</td>
                <td colspan="3">Average precision</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td><break/></td>
                <td>Query 1 (medication)</td>
                <td>Query 2 (laboratory test)</td>
                <td>Query 3 (diagnosis)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>G<sub>1</sub> (top 3%)</td>
                <td>0.253</td>
                <td>0.36</td>
                <td>0.10</td>
                <td>0.30</td>
              </tr>
              <tr valign="top">
                <td>G<sub>2</sub> (top 5%)</td>
                <td>0.277</td>
                <td>0.27</td>
                <td>0.26</td>
                <td>0.30</td>
              </tr>
              <tr valign="top">
                <td>G<sub>3</sub> (top 8%)</td>
                <td>0.320</td>
                <td>0.35</td>
                <td>0.31</td>
                <td>0.30</td>
              </tr>
              <tr valign="top">
                <td>G<sub>4</sub> (top 10%)</td>
                <td>0.313</td>
                <td>0.33</td>
                <td>0.31</td>
                <td>0.30</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Graphs with percentages of values of <italic>p</italic>(<italic>c</italic><sub>j</sub>|<italic>e(C</italic><sub>i</sub>)).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Retrieval performance comparison.</p>
          </caption>
          <table width="1000" cellpadding="7" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <thead>
              <tr valign="bottom">
                <td>Method</td>
                <td>MAP<sup>a</sup></td>
                <td colspan="2">Query 1 (medication)</td>
                <td colspan="2">Query 2 (laboratory test)</td>
                <td colspan="2">Query 3 (diagnosis)</td>
              </tr>
              <tr valign="bottom">
                <td><break/></td>
                <td><break/></td>
                <td>AP<sup>b</sup></td>
                <td>P@10<sup>c</sup></td>
                <td>AP</td>
                <td>P@10</td>
                <td>AP</td>
                <td>P@10</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>CKM</td>
                <td>0.227</td>
                <td>0.26</td>
                <td>0.40</td>
                <td>0.31</td>
                <td>0.30</td>
                <td>0.11</td>
                <td>0.10</td>
              </tr>
              <tr valign="top">
                <td>BM25F</td>
                <td>0.177</td>
                <td>0.08</td>
                <td>0.20</td>
                <td>0.18</td>
                <td>0.30</td>
                <td>0.27</td>
                <td>0.30</td>
              </tr>
              <tr valign="top">
                <td>Bayesian network</td>
                <td>0.127</td>
                <td>0.11</td>
                <td>0.20</td>
                <td>0.22</td>
                <td>0.30</td>
                <td>0.05</td>
                <td>0.10</td>
              </tr>
              <tr valign="top">
                <td>Our method</td>
                <td>0.320</td>
                <td>0.35</td>
                <td>0.50</td>
                <td>0.31</td>
                <td>0.50</td>
                <td>0.30</td>
                <td>0.30</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>MAP: mean average precision.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>AP: average precision.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>P@10: precision at 10.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Precision-recall curves of the four retrieval methods. BM25F: an extension of the BM25 ranking function; BN: Bayesian network; CKM: Clinical Knowledge Manager.</p>
          </caption>
          <graphic xlink:href="jmir_v21i5e13504_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The dual model methodology used by openEHR distinguished the clinical content domain from the technical domain, which enabled reusable CIMs (archetypes) [<xref ref-type="bibr" rid="ref9">9</xref>]. We were interested in identifying relevant CIMs online to standardize clinical concept representation within EHRs, so we developed a graphical retrieval method based on an extended Bayesian network and validated its feasibility using an online clinical information knowledge source: OpenEHR CKM. We combined a qualitative representation of the retrieval task, by using a graphical representation of relationships among data elements, concepts, and archetypes, with quantitative representation of the uncertainty of information needs, using a probabilistic approach. Compared with three typical retrieval methods (BM25F, Bayesian network, and CKM) in the medication, laboratory test, and diagnosis retrieval tasks, our method achieved the best MAP (MAP=0.32). In the diagnosis retrieval task, CKM and BM25F could not find the relevant archetype “openEHR- EHR-SECTION.problems_and_diagnoses.v1.” Our method could successfully identify the models covering “diagnostic reports,” “problem list,” “patients background,” “clinical decision,” etc, as well as “problems and diagnoses.”</p>
        <p>Although end users were mainly concerned about whether an archetype covered the concept name and core data items, we could not obtain satisfied performances without considering any potential knowledge that might be mined from the collection. Here, BM25F and Bayesian network just used clinical concepts and data elements as main features to represent each archetype and performed worse compared with the other models. In the laboratory test retrieval task, the recall of BM25F was 0.158, whereas ours was 1.0 and CKM was 0.895. In the diagnosis retrieval task, the value of precision at 3 of Bayesian network was 0, whereas ours was 1.0 and CKM was 0.333. A possible reason was that we used exact matching instead of fuzzy matching. The most important reason was that they only encoded the dependence relationships between variables and did not take into account the semantic associations between them. Previous studies showed that using the structure of existing knowledge resources and distributional statistics drawn from text corpora could help estimate semantic similarity and relatedness between medical concepts [<xref ref-type="bibr" rid="ref43">43</xref>]. In the openEHR framework, archetypes should map to clinical terminologies (such as SNOMED CT). However, most archetypes currently in the CKM lacked this kind of mapping, which could have limited the calculation of semantic relatedness. In this study, we learned relationships between concepts by a probabilistic approach and constructed a concept subnetwork with two layers. The results showed that the performance significantly improved, which explained the effectiveness of using prior knowledge to improve retrieval results.</p>
        <p>Accordingly, how to find the top <italic>n</italic> concepts relevant with each concept became crucial. We used e(<italic>C</italic><sub>i</sub>) as an event representing some type of evidence about the relevance of a concept <italic>C</italic><sub>i</sub>, and keywords were used as evidence in the experiment. With their help, we could find that the concepts “medication list” and “medication order list” were related, even though their concept name and data elements were totally different. There was also other semantic information that could be used as evidence, such as “purpose” and “use.” How to use them to better support retrieval might need to be further clarified. However, this method could also include in the network some lower relevant concepts, as shown in the column “Top 10%” in <xref ref-type="table" rid="table4">Table 4</xref>. For better results, we used AP and MAP as evaluation metrics to help select relevant concepts; meanwhile, we noticed that many concepts had the same values of conditional probabilities. This was because of the probabilistic approach we applied, which reminded us that we could not simply select the top <italic>n</italic> concepts as the relevant ones. Here, we adopted concepts with top <italic>n</italic> percentages of values of conditional probabilities.</p>
        <p>When modeling clinical resources network, we took the relationship of specialization between archetypes into consideration. It helped us find “openEHR-EHR- COMPOSITION.report-result.v1,” a specialized archetype of “openEHR-EHR- COMPOSITION.report.v1,” which BM25F could not find. In addition, we could also find relevant compositional parent archetypes successfully, even though we did not use the relationship of aggregation. For example, in the diagnosis retrieval task, our method could find “openEHR-EHR-SECTION.clinical_decision.v0,” which defined an archetype slot to allow “openEHR-EHR- EVALUATION.problem_diagnosis.v1.” It was because the compositional archetype used the clinical concept of the allowed archetype as its data element. When we linked the data element node to its corresponding concept node, we in fact modeled the relationship of aggregation.</p>
        <p>The key idea of our approach lay in identifying potentially relevant clinical concepts from the input. In a two-level model methodology, clinicians were usually the end users. In most scenarios, they were not familiar with openEHR archetypes and did not know what archetype-friendly concepts were. This requires the retrieval method to be as insensitive to the input as possible. For example, take the medication retrieval task. If the user inputs “medication item, order start date/time, dosage, dose unit, comment,” using some frequent words in the archetype library, the CKM performed better than ours. The AP value of CKM was 0.82 (P@10=0.7, recall=1) whereas ours was 0.45 (P@10=0.6, recall=1). However, when the user used uncommon words, such as “medicine name” (<xref ref-type="table" rid="table1">Table 1</xref>), our method, CKM, Bayesian network, and BM25F achieved an AP of 0.35, 0.26, 0.11, and 0.08, respectively. In addition, as <xref ref-type="table" rid="table6">Table 6</xref> shows, our AP was almost equal across different retrieval tasks (0.35, 0.31, and 0.30, respectively), whereas the other retrieval methods were not. From the clinical domain, queries 2 and 3 mainly belonged to the topic of detection/treatment results, whereas query 1 belonged to treatment, which indicated that our performance was relatively stable across different clinical domains. All these showed that our method was more robust than the others.</p>
        <p>Additionally, better retrieval results could help users to identify reusable archetypes quickly, promote reuse of archetypes, and improve standardization of CIMs, thereby enhancing interoperability of EHRs. Archetype modeling methodology [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref23">23</xref>] showed that clinicians and domain experts should compare archetype design specifications with retrieved archetypes to decide whether new archetypes need to be developed or whether an existing one could be adapted. Our method could successfully identify relevant archetypes that the CKM could not find, such as “openEHR-EHR- SECTION.problems_and_diagnoses.v1” in the diagnosis retrieval task. If this archetype was the case need, domain experts might create a new one as they thought it did not exist in the CKM. Our method achieved the best recall (recall=1) in different retrieval tasks, which could help reuse archetypes and promote the semantic interoperability of EHRs.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study has important limitations. First, it is a feasibility study based on openEHR archetypes. Whether our method can be applied to other CIMs, such as HL7 templates, and to what extent it needs to be localized still need to be clarified and validated. In fact, the key features used in our method are data elements, clinical concepts, CIMs (archetypes), and their relationships. It indicates that our method has potential feasibility if these features are available for other CIMs. Which results are potentially possible will be discussed in future work.</p>
        <p>Second, our method presented in this study lacks the calculation of the semantic relevance of synonyms or homonyms, both for queries and network modeling. However, relevant semantic computing methods [<xref ref-type="bibr" rid="ref43">43</xref>] can be applied to our retrieval method. With their help, we may be able to identify that “medication item” and “medicine item” referred to the same term, and the results would be improved. In the future, we will validate its feasibility and effectiveness.</p>
        <p>Third, we did not validate the impact of our method on interoperability. In fact, the basic problem of semantic interoperability in EHRs must be solved from the perspective of the business domains the concepts originally belong to. Our approach only addresses specific technical issues in the CIM modeling process.</p>
        <p>Furthermore, there are other limitations. First, the relevant archetypes in the three retrieval tasks that we manually annotated may be controversial, according to different experts. Second, we compared our performances with the CKM on different archetype collections, which may lead to inaccurate results.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this paper, we proposed an extended Bayesian network retrieval method for finding relevant CIMs. We graphically represented openEHR archetypes using an extended Bayesian network with two concept layers. The results show that it is an effective approach to meet the uncertainty of retrieval tasks, and the key step in modeling this network is to learn the dependencies between concepts. Our better retrieval results could encourage clinicians and domain experts to reuse existing CIMs to represent EHR data in a standard manner, thereby enhancing the interoperability of EHRs. Furthermore, our study provided how the inference process was carried out. Comparing the results of our method with baseline methods, we had the best performance. To optimize the method, further research should focus on the potential feasibility for other CIMs and the calculation of semantic relevance of synonyms or homonyms.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADL</term>
          <def>
            <p>Archetype Definition Language</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AP</term>
          <def>
            <p>average precision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CIM</term>
          <def>
            <p>clinical information model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CKM</term>
          <def>
            <p>Clinical Knowledge Manager</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">HL7</term>
          <def>
            <p>Health Level Seven</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MAP</term>
          <def>
            <p>mean average precision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">P@10</term>
          <def>
            <p>precision at 10</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">RM</term>
          <def>
            <p>reference model</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research is supported by the Chinese Academy of Medical Sciences (grant #2017PT63010, 2018PT33024) and the National Key R&amp;D Program of China (grant #2016YFC0901901, 2017YFC0907503).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sherman</surname>
            <given-names>RE</given-names>
          </name>
          <name name-style="western">
            <surname>Anderson</surname>
            <given-names>SA</given-names>
          </name>
          <name name-style="western">
            <surname>Dal Pan</surname>
            <given-names>GJ</given-names>
          </name>
          <name name-style="western">
            <surname>Gray</surname>
            <given-names>GW</given-names>
          </name>
          <name name-style="western">
            <surname>Gross</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Hunter</surname>
            <given-names>NL</given-names>
          </name>
          <name name-style="western">
            <surname>LaVange</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Marinac-Dabic</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Marks</surname>
            <given-names>PW</given-names>
          </name>
          <name name-style="western">
            <surname>Robb</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Shuren</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Temple</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Woodcock</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Yue</surname>
            <given-names>LQ</given-names>
          </name>
          <name name-style="western">
            <surname>Califf</surname>
            <given-names>RM</given-names>
          </name>
        </person-group>
        <article-title>Real-world evidence-what is it and what can it tell us?</article-title>
        <source>N Engl J Med</source>  
        <year>2016</year>  
        <month>12</month>  
        <day>08</day>  
        <volume>375</volume>  
        <issue>23</issue>  
        <fpage>2293</fpage>  
        <lpage>2297</lpage>  
        <pub-id pub-id-type="doi">10.1056/NEJMsb1609216</pub-id>
        <pub-id pub-id-type="medline">27959688</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
        <source>Framework for FDA’s Real-World Evidence Program</source>  
        <year>2018</year>  
        <month>12</month>  
        <access-date>2019-05-15</access-date>
        <publisher-loc>Silver Spring, MD</publisher-loc>
        <publisher-name>US Food and Drug Administration</publisher-name>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/media/120060/download">https://www.fda.gov/media/120060/download</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78NqFnwnD"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Verheij</surname>
            <given-names>RA</given-names>
          </name>
          <name name-style="western">
            <surname>Curcin</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Delaney</surname>
            <given-names>BC</given-names>
          </name>
          <name name-style="western">
            <surname>McGilchrist</surname>
            <given-names>MM</given-names>
          </name>
        </person-group>
        <article-title>Possible sources of bias in primary care electronic health record data use and reuse</article-title>
        <source>J Med Internet Res</source>  
        <year>2018</year>  
        <month>05</month>  
        <day>29</day>  
        <volume>20</volume>  
        <issue>5</issue>  
        <fpage>e185</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2018/5/e185/"/>
        </comment>  
        <pub-id pub-id-type="doi">10.2196/jmir.9134</pub-id>
        <pub-id pub-id-type="medline">29844010</pub-id>
        <pub-id pub-id-type="pii">v20i5e185</pub-id>
        <pub-id pub-id-type="pmcid">PMC5997930</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
        <source>European Commission</source>  
        <year>2012</year>  
        <month>12</month>  
        <day>07</day>  
        <access-date>2019-05-18</access-date>
        <comment>eHealth Action Plan 2012-2020: innovative healthcare for the 21st century 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://ec.europa.eu/digital-single-market/en/news/ehealth-action-plan-2012-2020-innovative-healthcare-21st-century">https://ec.europa.eu/digital-single-market/en/news/ehealth-action-plan-2012-2020-innovative-healthcare-21st-century</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78SkxTsZO"/></comment> </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>He</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Baxter</surname>
            <given-names>SL</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Zhou</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>The practical implementation of artificial intelligence technologies in medicine</article-title>
        <source>Nat Med</source>  
        <year>2019</year>  
        <month>01</month>  
        <volume>25</volume>  
        <issue>1</issue>  
        <fpage>30</fpage>  
        <lpage>36</lpage>  
        <pub-id pub-id-type="doi">10.1038/s41591-018-0307-0</pub-id>
        <pub-id pub-id-type="medline">30617336</pub-id>
        <pub-id pub-id-type="pii">10.1038/s41591-018-0307-0</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
        <source>International Organization for Standardization</source>  
        <year>2011</year>  
        <month>04</month>  
        <access-date>2019-05-15</access-date>
        <comment>ISO 18308:2011 health informatics-requirements for an electronic health record architecture 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.iso.org/standard/52823.html">https://www.iso.org/standard/52823.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78NrkRGeJ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rector</surname>
            <given-names>AL</given-names>
          </name>
          <name name-style="western">
            <surname>Nowlan</surname>
            <given-names>WA</given-names>
          </name>
          <name name-style="western">
            <surname>Kay</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Goble</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Howkins</surname>
            <given-names>TJ</given-names>
          </name>
        </person-group>
        <article-title>A framework for modelling the electronic medical record</article-title>
        <source>Methods Inf Med</source>  
        <year>1993</year>  
        <month>04</month>  
        <volume>32</volume>  
        <issue>2</issue>  
        <fpage>109</fpage>  
        <lpage>119</lpage>  
        <pub-id pub-id-type="medline">8321129</pub-id>
        <pub-id pub-id-type="pii">93020109</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Goossen</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Goossen-Baremans</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>van der Zel</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Detailed clinical models: a review</article-title>
        <source>Healthc Inform Res</source>  
        <year>2010</year>  
        <month>12</month>  
        <volume>16</volume>  
        <issue>4</issue>  
        <fpage>201</fpage>  
        <lpage>214</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.e-hir.org/DOIx.php?id=10.4258/hir.2010.16.4.201"/>
        </comment>  
        <pub-id pub-id-type="doi">10.4258/hir.2010.16.4.201</pub-id>
        <pub-id pub-id-type="medline">21818440</pub-id>
        <pub-id pub-id-type="pmcid">PMC3092133</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Leslie</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <source>ResearchGate</source>  
        <year>2014</year>  
        <month>07</month>  
        <access-date>2019-05-15</access-date>
        <comment>The openEHR approach 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/277667443_The_openEHR_approach">https://www.researchgate.net/publication/277667443_The_openEHR_approach</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78NsTRNDB"/></comment> </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
        <source>International Organization for Standardization</source>  
        <access-date>2019-05-15</access-date>
        <comment>ISO 13606 Standard 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.iso.org/home.html">https://www.iso.org/home.html</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78Nsir48b"/></comment> </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
        <source>OpenEHR</source>  
        <access-date>2019-05-15</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.openehr.org/">https://www.openehr.org/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78Nt2sQ3h"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
        <source>Health Level Seven</source>  
        <access-date>2019-05-15</access-date>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.hl7.org/">http://www.hl7.org/</ext-link>
          <ext-link ext-link-type="webcite" xlink:href="78NtFA8bQ"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Moreno-Conde</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Moner</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Cruz</surname>
            <given-names>WD</given-names>
          </name>
          <name name-style="western">
            <surname>Santos</surname>
            <given-names>MR</given-names>
          </name>
          <name name-style="western">
            <surname>Maldonado</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Robles</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kalra</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Clinical information modeling processes for semantic interoperability of electronic health records: systematic review and inductive analysis</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2015</year>  
        <month>07</month>  
        <volume>22</volume>  
        <issue>4</issue>  
        <fpage>925</fpage>  
        <lpage>934</lpage>  
        <pub-id pub-id-type="doi">10.1093/jamia/ocv008</pub-id>
        <pub-id pub-id-type="medline">25796595</pub-id>
        <pub-id pub-id-type="pii">ocv008</pub-id></nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Min</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Duan</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Archetype relational mapping-a practical openEHR persistence solution</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2015</year>  
        <month>11</month>  
        <day>05</day>  
        <volume>15</volume>  
        <fpage>88</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-015-0212-0"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s12911-015-0212-0</pub-id>
        <pub-id pub-id-type="medline">26541142</pub-id>
        <pub-id pub-id-type="pii">10.1186/s12911-015-0212-0</pub-id>
        <pub-id pub-id-type="pmcid">PMC4636072</pub-id></nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Min</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Tian</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>An</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Duan</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>An openEHR based approach to improve the semantic interoperability of clinical data registry</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2018</year>  
        <month>03</month>  
        <day>22</day>  
        <volume>18</volume>  
        <issue>Suppl 1</issue>  
        <fpage>15</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-018-0596-8"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/s12911-018-0596-8</pub-id>
        <pub-id pub-id-type="medline">29589572</pub-id>
        <pub-id pub-id-type="pii">10.1186/s12911-018-0596-8</pub-id>
        <pub-id pub-id-type="pmcid">PMC5872380</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cardoso de Moraes</surname>
            <given-names>JL</given-names>
          </name>
          <name name-style="western">
            <surname>de Souza</surname>
            <given-names>WL</given-names>
          </name>
          <name name-style="western">
            <surname>Pires</surname>
            <given-names>LF</given-names>
          </name>
          <name name-style="western">
            <surname>do Prado</surname>
            <given-names>AF</given-names>
          </name>
        </person-group>
        <article-title>A methodology based on openEHR archetypes and software agents for developing e-health applications reusing legacy systems</article-title>
        <source>Comput Methods Programs Biomed</source>  
        <year>2016</year>  
        <month>10</month>  
        <volume>134</volume>  
        <fpage>267</fpage>  
        <lpage>287</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0169-2607(16)30298-X"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.cmpb.2016.07.013</pub-id>
        <pub-id pub-id-type="medline">27480749</pub-id>
        <pub-id pub-id-type="pii">S0169-2607(16)30298-X</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Min</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Duan</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Qiao</surname>
            <given-names>Q</given-names>
          </name>
        </person-group>
        <article-title>An implementation of clinical data repository with openehr approach: from data modeling to architecture</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2016</year>  
        <volume>227</volume>  
        <fpage>100</fpage>  
        <lpage>105</lpage>  
        <pub-id pub-id-type="medline">27440296</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Marco-Ruiz</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Moner</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Maldonado</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Kolstrup</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Bellika</surname>
            <given-names>JG</given-names>
          </name>
        </person-group>
        <article-title>Archetype-based data warehouse environment to enable the reuse of electronic health record data</article-title>
        <source>Int J Med Inform</source>  
        <year>2015</year>  
        <month>09</month>  
        <volume>84</volume>  
        <issue>9</issue>  
        <fpage>702</fpage>  
        <lpage>714</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2015.05.016</pub-id>
        <pub-id pub-id-type="medline">26094821</pub-id>
        <pub-id pub-id-type="pii">S1386-5056(15)30005-8</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wulff</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Haarbrandt</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Tute</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Marschollek</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Beerbaum</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Jack</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>An interoperable clinical decision-support system for early detection of SIRS in pediatric intensive care using openEHR</article-title>
        <source>Artif Intell Med</source>  
        <year>2018</year>  
        <month>07</month>  
        <volume>89</volume>  
        <fpage>10</fpage>  
        <lpage>23</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0933-3657(17)30440-2"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.artmed.2018.04.012</pub-id>
        <pub-id pub-id-type="medline">29753616</pub-id>
        <pub-id pub-id-type="pii">S0933-3657(17)30440-2</pub-id></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Klein</surname>
            <given-names>GO</given-names>
          </name>
          <name name-style="western">
            <surname>Sundvall</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Karlsson</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Ahlfeldt</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Archetype-based conversion of EHR content models: pilot experience with a regional EHR system</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2009</year>  
        <month>07</month>  
        <day>01</day>  
        <volume>9</volume>  
        <fpage>33</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-9-33"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6947-9-33</pub-id>
        <pub-id pub-id-type="medline">19570196</pub-id>
        <pub-id pub-id-type="pii">1472-6947-9-33</pub-id>
        <pub-id pub-id-type="pmcid">PMC2715396</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Saalfeld</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Tute</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Wolf</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Marschollek</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Introducing a method for transformation of paper-based research data into concept-based representation with openEHR</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2017</year>  
        <volume>235</volume>  
        <fpage>151</fpage>  
        <lpage>155</lpage>  
        <pub-id pub-id-type="medline">28423773</pub-id></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mar</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Begoña</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Riaño</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>ten Teije</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Miksch</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Peleg</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Towards the interoperability of computerised guidelines and electronic health records: an experiment with openEHR archetypes and a chronic heart failure guideline</article-title>
        <source>Knowledge Representation for Health-Care. KR4HC 2010. Lecture Notes in Computer Science</source>  
        <year>2011</year>  
        <publisher-loc>Berlin</publisher-loc>
        <publisher-name>Springer</publisher-name>
        <fpage>101</fpage>  
        <lpage>113</lpage> </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Moner</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Maldonado</surname>
            <given-names>JA</given-names>
          </name>
          <name name-style="western">
            <surname>Robles</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Archetype modeling methodology</article-title>
        <source>J Biomed Inform</source>  
        <year>2018</year>  
        <month>12</month>  
        <volume>79</volume>  
        <fpage>71</fpage>  
        <lpage>81</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30022-4"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2018.02.003</pub-id>
        <pub-id pub-id-type="medline">29454107</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(18)30022-4</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
        <source>openEHR</source>  
        <access-date>2019-05-18</access-date>
        <comment>Clinical Knowledge Manager 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.openehr.org/ckm/">https://www.openehr.org/ckm/</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78SlOqhgw"/></comment> </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Teodoro</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Sundvall</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>João Junior</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ruch</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Miranda Freire</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>ORBDA: an openEHR benchmark dataset for performance assessment of electronic health record servers</article-title>
        <source>PLoS One</source>  
        <year>2018</year>  
        <volume>13</volume>  
        <issue>1</issue>  
        <fpage>e0190028</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0190028"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0190028</pub-id>
        <pub-id pub-id-type="medline">29293556</pub-id>
        <pub-id pub-id-type="pii">PONE-D-17-16299</pub-id>
        <pub-id pub-id-type="pmcid">PMC5749730</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Maranhão</surname>
            <given-names>PA</given-names>
          </name>
          <name name-style="western">
            <surname>Bacelar-Silva</surname>
            <given-names>GM</given-names>
          </name>
          <name name-style="western">
            <surname>Ferreira</surname>
            <given-names>DN</given-names>
          </name>
          <name name-style="western">
            <surname>Calhau</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Vieira-Marques</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Cruz-Correia</surname>
            <given-names>RJ</given-names>
          </name>
        </person-group>
        <article-title>Nutrigenomic information in the openEHR data set</article-title>
        <source>Appl Clin Inform</source>  
        <year>2018</year>  
        <month>01</month>  
        <volume>9</volume>  
        <issue>1</issue>  
        <fpage>221</fpage>  
        <lpage>231</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29590680"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1055/s-0038-1635115</pub-id>
        <pub-id pub-id-type="medline">29590680</pub-id>
        <pub-id pub-id-type="pmcid">PMC5874138</pub-id></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Pahl</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Zare</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Nilashi</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>de Faria Borges</surname>
            <given-names>MA</given-names>
          </name>
          <name name-style="western">
            <surname>Weingaertner</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Detschew</surname>
            <given-names>V</given-names>
          </name>
          <name name-style="western">
            <surname>Supriyanto</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Ibrahim</surname>
            <given-names>O</given-names>
          </name>
        </person-group>
        <article-title>Role of OpenEHR as an open source solution for the regional modelling of patient data in obstetrics</article-title>
        <source>J Biomed Inform</source>  
        <year>2015</year>  
        <month>06</month>  
        <volume>55</volume>  
        <fpage>174</fpage>  
        <lpage>187</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00071-4"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.jbi.2015.04.004</pub-id>
        <pub-id pub-id-type="medline">25900270</pub-id>
        <pub-id pub-id-type="pii">S1532-0464(15)00071-4</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Finlayson</surname>
            <given-names>SG</given-names>
          </name>
          <name name-style="western">
            <surname>LePendu</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Shah</surname>
            <given-names>NH</given-names>
          </name>
        </person-group>
        <article-title>Building the graph of medicine from millions of clinical narratives</article-title>
        <source>Sci Data</source>  
        <year>2014</year>  
        <volume>1</volume>  
        <fpage>140032</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25977789"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1038/sdata.2014.32</pub-id>
        <pub-id pub-id-type="medline">25977789</pub-id>
        <pub-id pub-id-type="pmcid">PMC4322575</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Goodwin</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Harabagiu</surname>
            <given-names>SM</given-names>
          </name>
        </person-group>
        <article-title>Automatic generation of a qualified medical knowledge graphits usage for retrieving patient cohorts from electronic medical records</article-title>
        <year>2013</year>  
        <month>09</month>  
        <day>16</day>  
        <conf-name>IEEE Seventh International Conference on Semantic Computing</conf-name>
        <conf-date>Sep 16-18, 2013</conf-date>
        <conf-loc>Irvine, CA</conf-loc>
        <fpage>978</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.hlt.utdallas.edu/~travis/papers/icsc_2013.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rotmensch</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Halpern</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Tlimat</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Horng</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Sontag</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Learning a health knowledge graph from electronic medical records</article-title>
        <source>Sci Rep</source>  
        <year>2017</year>  
        <month>07</month>  
        <day>20</day>  
        <volume>7</volume>  
        <issue>1</issue>  
        <fpage>5994</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/s41598-017-05778-z"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1038/s41598-017-05778-z</pub-id>
        <pub-id pub-id-type="medline">28729710</pub-id>
        <pub-id pub-id-type="pii">10.1038/s41598-017-05778-z</pub-id>
        <pub-id pub-id-type="pmcid">PMC5519723</pub-id></nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Turtle</surname>
            <given-names>HR</given-names>
          </name>
          <name name-style="western">
            <surname>Croft</surname>
            <given-names>WB</given-names>
          </name>
        </person-group>
        <article-title>Efficient probabilistic inference for text retrieval</article-title>
        <source>Proceedings RIAO '91 Intelligent Text and Image Handling</source>  
        <year>1991</year>  
        <conf-name>RIAO '91 Intelligent Text and Image Handling</conf-name>
        <conf-date>Apr 2-5, 1991</conf-date>
        <conf-loc>Barcelona, Spain</conf-loc>
        <fpage>644</fpage>  
        <lpage>661</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/citation.cfm?id=3171012"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>de Campos</surname>
            <given-names>LM</given-names>
          </name>
          <name name-style="western">
            <surname>Fernandez-Luna</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Huete</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>The BNR model: foundations and performance of a bayesian network-based retrieval model</article-title>
        <source>Int J Approx Reason</source>  
        <year>2003</year>  
        <month>11</month>  
        <volume>34</volume>  
        <issue>2-3</issue>  
        <fpage>265</fpage>  
        <lpage>285</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://ac.els-cdn.com/S0888613X03000951/1-s2.0-S0888613X03000951-main.pdf?_tid=40382984-b7b4-453d-b5db-9afd954688ab&amp;acdnat=1545658521_4e130c227c68a38115e267b8915873e2"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>de Campos</surname>
            <given-names>LM</given-names>
          </name>
          <name name-style="western">
            <surname>Fernandez-Luna</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Huete</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Clustering terms in the bayesian network retrieval model: a new approach with two term-layers</article-title>
        <source>Appl Soft Comput</source>  
        <year>2004</year>  
        <month>05</month>  
        <volume>4</volume>  
        <issue>2</issue>  
        <fpage>149</fpage>  
        <lpage>158</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/222688641_Clustering_terms_in_the_Bayesian_network_retrieval_model_A_new_approach_with_two_term-layers"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Garrouch</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Omri</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Bayesian network based information retrieval model</article-title>
        <year>2017</year>  
        <conf-name>International Conference on High Performance Computing &amp; Simulation</conf-name>
        <conf-date>July 17, 2017</conf-date>
        <conf-loc>Genoa, Italy</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/317185437_Bayesian_Network_Based_Information_Retrieval_Model"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Tang</surname>
            <given-names>WS</given-names>
          </name>
        </person-group>
        <article-title>A word similarity based belief network IR model with two term layers</article-title>
        <year>2009</year>  
        <conf-name>WRI Global Congress on Intelligent Systems</conf-name>
        <conf-date>May 19-21, 2009</conf-date>
        <conf-loc>Xiamen, China</conf-loc>
        <fpage>19</fpage>  
        <lpage>21</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/5209386"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Acid</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>de Campos</surname>
            <given-names>LM</given-names>
          </name>
          <name name-style="western">
            <surname>Fernandez-Luna</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>An information retrieval model based on simple Bayesian networks</article-title>
        <source>Int J Intell Syst</source>  
        <year>2003</year>  
        <volume>18</volume>  
        <issue>2</issue>  
        <fpage>251</fpage>  
        <lpage>265</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.180.7335&amp;rep=rep1&amp;type=pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Meystre</surname>
            <given-names>SM</given-names>
          </name>
          <name name-style="western">
            <surname>Lovis</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Bürkle</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Tognola</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Budrionis</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Lehmann</surname>
            <given-names>CU</given-names>
          </name>
        </person-group>
        <article-title>Clinical data reuse or secondary use: current status and potential future progress</article-title>
        <source>Yearb Med Inform</source>  
        <year>2017</year>  
        <month>08</month>  
        <volume>26</volume>  
        <issue>1</issue>  
        <fpage>38</fpage>  
        <lpage>52</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.15265/IY-2017-007"/>
        </comment>  
        <pub-id pub-id-type="doi">10.15265/IY-2017-007</pub-id>
        <pub-id pub-id-type="medline">28480475</pub-id>
        <pub-id pub-id-type="pmcid">PMC6239225</pub-id></nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rajkomar</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Oren</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <article-title>Scalable and accurate deep learning with electronic health records</article-title>
        <source>NPJ Digital Medicine</source>  
        <year>2018</year>  
        <month>05</month>  
        <volume>1</volume>  
        <issue>1</issue>  
        <fpage>18</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nature.com/articles/s41746-018-0029-1"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hruby</surname>
            <given-names>GW</given-names>
          </name>
          <name name-style="western">
            <surname>Hoxha</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ravichandran</surname>
            <given-names>PC</given-names>
          </name>
          <name name-style="western">
            <surname>Mendonça</surname>
            <given-names>EA</given-names>
          </name>
          <name name-style="western">
            <surname>Hanauer</surname>
            <given-names>DA</given-names>
          </name>
          <name name-style="western">
            <surname>Weng</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <article-title>A data-driven concept schema for defining clinical research data needs</article-title>
        <source>Int J Med Inform</source>  
        <year>2016</year>  
        <month>07</month>  
        <volume>91</volume>  
        <fpage>1</fpage>  
        <lpage>9</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27185504"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2016.03.008</pub-id>
        <pub-id pub-id-type="medline">27185504</pub-id>
        <pub-id pub-id-type="pii">S1386-5056(16)30049-1</pub-id>
        <pub-id pub-id-type="pmcid">PMC4873775</pub-id></nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Denaxas</surname>
            <given-names>SC</given-names>
          </name>
          <name name-style="western">
            <surname>Morley</surname>
            <given-names>KI</given-names>
          </name>
        </person-group>
        <article-title>Big biomedical data and cardiovascular disease research: opportunities and challenges</article-title>
        <source>Eur Heart J Qual Care Clin Outcomes</source>  
        <year>2015</year>  
        <month>07</month>  
        <day>01</day>  
        <volume>1</volume>  
        <issue>1</issue>  
        <fpage>9</fpage>  
        <lpage>16</lpage>  
        <pub-id pub-id-type="doi">10.1093/ehjqcco/qcv005</pub-id>
        <pub-id pub-id-type="medline">29474568</pub-id>
        <pub-id pub-id-type="pii">1860292</pub-id></nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
        <source>GitHub</source>  
        <access-date>2019-05-15</access-date>
        <comment>Adl-parser 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/openEHR/java-libs/tree/master/adl-parser">https://github.com/openEHR/java-libs/tree/master/adl-parser</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="78NxYGEFV"/></comment> </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Zaragoza</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Craswell</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Taylor</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Microsoft Cambridge at TREC 2004: Web and HARD track</article-title>
        <year>2004</year>  
        <conf-name>TREC 2004</conf-name>
        <conf-date>Nov 16, 2004</conf-date>
        <conf-loc>Gaithersburg, MD</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec13/papers/microsoft-cambridge.web.hard.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Wallace</surname>
            <given-names>BC</given-names>
          </name>
          <name name-style="western">
            <surname>Johnson</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Cohen</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Retrofitting concept vector representations of medical concepts to improve estimates of semantic similarity and relatedness</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2017</year>  
        <volume>245</volume>  
        <fpage>657</fpage>  
        <lpage>661</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29295178"/>
        </comment>  
        <pub-id pub-id-type="medline">29295178</pub-id>
        <pub-id pub-id-type="pmcid">PMC6464117</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
