<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i10e25378</article-id>
      <article-id pub-id-type="pmid">34714247</article-id>
      <article-id pub-id-type="doi">10.2196/25378</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Developing a RadLex-Based Named Entity Recognition Tool for Mining Textual Radiology Reports: Development and Performance Evaluation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Torii</surname>
            <given-names>Manabu</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Feizi Derakhshi</surname>
            <given-names>Ali Reza</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yang</surname>
            <given-names>Chen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Alex</surname>
            <given-names>Beatrice</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Tsuji</surname>
            <given-names>Shintaro</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2809-2122</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Wen</surname>
            <given-names>Andrew</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9090-8028</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Takahashi</surname>
            <given-names>Naoki</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7946-6078</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Hongjian</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1322-6634</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Ogasawara</surname>
            <given-names>Katsuhiko</given-names>
          </name>
          <degrees>MBA, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5474-7861</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>Gouqian</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Health Sciences Research</institution>
            <institution>Department of Radiology</institution>
            <addr-line>200 First Street, SW</addr-line>
            <addr-line>Rochester, MN </addr-line>
            <country>United States</country>
            <phone>1 507 266 1327</phone>
            <fax>1 507 284 1516</fax>
            <email>Jiang.Guoqian@mayo.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2940-0019</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Health Sciences Research</institution>
        <institution>Department of Radiology</institution>
        <addr-line>Rochester, MN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Graduate School of Health Sciences</institution>
        <institution>Hokkaido University</institution>
        <addr-line>Sapporo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Radiology</institution>
        <institution>Mayo Clinic</institution>
        <addr-line>Rochester, MN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Gouqian Jiang <email>Jiang.Guoqian@mayo.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>10</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>29</day>
        <month>10</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>10</issue>
      <elocation-id>e25378</elocation-id>
      <history>
        <date date-type="received">
          <day>10</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>3</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>27</day>
          <month>7</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Shintaro Tsuji, Andrew Wen, Naoki Takahashi, Hongjian Zhang, Katsuhiko Ogasawara, Gouqian Jiang. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 29.10.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/10/e25378" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Named entity recognition (NER) plays an important role in extracting the features of descriptions such as the name and location of a disease for mining free-text radiology reports. However, the performance of existing NER tools is limited because the number of entities that can be extracted depends on the dictionary lookup. In particular, the recognition of compound terms is very complicated because of the variety of patterns.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study is to develop and evaluate an NER tool concerned with compound terms using RadLex for mining free-text radiology reports.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We leveraged the clinical Text Analysis and Knowledge Extraction System (cTAKES) to develop customized pipelines using both RadLex and SentiWordNet (a general purpose dictionary). We manually annotated 400 radiology reports for compound terms in noun phrases and used them as the gold standard for performance evaluation (precision, recall, and F-measure). In addition, we created a compound terms–enhanced dictionary (CtED) by analyzing false negatives and false positives and applied it to another 100 radiology reports for validation. We also evaluated the stem terms of compound terms by defining two measures: occurrence ratio (OR) and matching ratio (MR).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The F-measure of cTAKES+RadLex+general purpose dictionary was 30.9% (precision 73.3% and recall 19.6%) and that of the combined CtED was 63.1% (precision 82.8% and recall 51%). The OR indicated that the stem terms of <italic>effusion</italic>, <italic>node</italic>, <italic>tube</italic>, and <italic>disease</italic> were used frequently, but it still lacks capturing compound terms. The MR showed that 71.85% (9411/13,098) of the stem terms matched with that of the ontologies, and RadLex improved approximately 22% of the MR from the cTAKES default dictionary. The OR and MR revealed that the characteristics of stem terms would have the potential to help generate synonymous phrases using the ontologies.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We developed a RadLex-based customized pipeline for parsing radiology reports and demonstrated that CtED and stem term analysis has the potential to improve dictionary-based NER performance with regard to expanding vocabularies.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>named entity recognition (NER)</kwd>
        <kwd>natural language processing (NLP)</kwd>
        <kwd>RadLex</kwd>
        <kwd>ontology</kwd>
        <kwd>stem term</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The widespread adoption of electronic medical record (EMR) systems in recent years has increasingly brought opportunities to research communities regarding the secondary use of EMR data such as medical images and clinical notes [<xref ref-type="bibr" rid="ref1">1</xref>] to support clinical and translational research. It is expected that real-world data will contribute to generating medical evidence, optimizing the use of medical resources, and creating high-quality diagnostic or treatment guidelines [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. To establish effective retrieval and extraction of such data stored in the EMR, standard codes are usually used to describe patient records and make them computable and interpretable. For example, the <italic>International Classification of Diseases</italic> is a standard code system used to classify diseases or diagnoses for medical records [<xref ref-type="bibr" rid="ref4">4</xref>]. The <italic>International Classification of Diseases</italic> can be used to identify classified disease names from medical records in information-retrieval applications. In addition, a standard code system can also be used to extract features from medical texts such as pathology reports, radiology reports, and family history reports. For example, SNOMED-CT (Systematized Nomenclature of Medicine-Clinical Terms) is a standard terminology in the field of medical care [<xref ref-type="bibr" rid="ref5">5</xref>], which is often used as a resource for the automatic named entity recognition (NER) of medical texts [<xref ref-type="bibr" rid="ref6">6</xref>]. Moreover, SNOMED-CT is formalized as an ontology, which has a hierarchical structure of terms and semantic relationships between terms. Such an ontology supports medical reasoning with standard concept definitions and axioms among concepts.</p>
        <p>In the field of radiology, a large amount of medical imaging data and diagnostic reporting data is stored in the EMR, which has become an important data source for acquiring knowledge. The use of standard code systems is critical for the effective mining of the data source. RadLex, produced by the <italic>Radiological Society of North America</italic>, is a controlled-standard biomedical ontology that provides codes, conceptual relationships, and procedures of imaging examinations [<xref ref-type="bibr" rid="ref7">7</xref>]. RadLex was historically developed as indexing teaching files for radiologists, provided by the <italic>American College of Radiology</italic> [<xref ref-type="bibr" rid="ref8">8</xref>]. Currently, RadLex is widely used to support the creation of templates for generating radiology reports [<xref ref-type="bibr" rid="ref9">9</xref>], mining radiology reporting data [<xref ref-type="bibr" rid="ref10">10</xref>], indexing medical images and reports [<xref ref-type="bibr" rid="ref11">11</xref>], and standardizing examination descriptions [<xref ref-type="bibr" rid="ref12">12</xref>]. From the perspective of data interoperability in the radiology domain, RadLex is a unique ontology in that it enables semantic parsing of free-text radiology reports by playing a role in integrating identified entities into a higher-level semantic concept such as <italic>anatomical entities</italic>, <italic>clinical findings</italic>, <italic>imaging observation</italic>, and <italic>procedures</italic>.</p>
        <p>NER is usually used for preprocessing unstructured data for machine learning research, for example, extracting features from radiology reports [<xref ref-type="bibr" rid="ref13">13</xref>]. In a previous study on the NER evaluation based on radiologist agreement, it was reported that the F-measure of dictionary-based NER was lower than that of conditional random fields (CRFs) [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>] and rule-based natural language processing (NLP) [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. However, machine learning–based NER does not provide a relationship between terms, and the reason for the F-measure of dictionary-based NER being lower than that of machine learning–based NER is that it is difficult to identify various patterns of compound terms using standard terminologies or ontologies. For example, in the case of the compound term <italic>right-sided IJ central venous catheter</italic>, all the words in the term except for <italic>catheter</italic> are modifiers. In short, there are several patterns such as <italic>IJ</italic> <italic>central venous catheter</italic> and <italic>venous catheter</italic> that can be identified as annotations by radiologists.</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>Although an ontology such as RadLex can be leveraged to enhance data interoperability and track relationships and hierarchical structure, we consider that the ontology should also be applied to improve the NER of compound terms in radiology reports. However, few studies have been conducted to evaluate the coverage of RadLex for the NER of compound terms for mining radiology reports. To evaluate and extend the coverage of the lexicon for extracting features from radiology reports, the aim of this study is to develop and assess an NER tool based on RadLex, explore the entities included in RadLex, and subsequently extend the ontology for a higher F-measure on feature extraction by dictionary-based NER.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>RadLex Features</title>
        <p>RadLex is a controlled-standard biomedical ontology produced by the <italic>Radiological Society of North America</italic>, which provides unique codes, conceptual mapping based on hierarchal structure, and procedures of imaging examinations [<xref ref-type="bibr" rid="ref7">7</xref>]. We used and analyzed RadLex version 1.3.4 [<xref ref-type="bibr" rid="ref18">18</xref>], which includes 46,434 primary terms and 42,831 compound terms.</p>
      </sec>
      <sec>
        <title>General Purpose Dictionary</title>
        <p>We used a general purpose dictionary (GPD), SentiWordNet [<xref ref-type="bibr" rid="ref19">19</xref>], to compare RadLex coverage with a general dictionary. SentiWordNet, which is a GPD for sentiment analysis in the context of social network services, provides a negative or positive score of terms. The number of words of parts of speech (POS) is 117,659, including 82,115 distinct nouns, 13,767 verbs, 18,156 adjectives, and 3621 adverbs. The number of compound terms is 48,469.</p>
      </sec>
      <sec>
        <title>Clinical Text Analysis Knowledge Extraction System</title>
        <p>The clinical Text Analysis Knowledge Extraction System (cTAKES), which is an NLP system for extraction of information from EMR clinical free text, contains an automatic NER tool using a dictionary lookup mechanism [<xref ref-type="bibr" rid="ref20">20</xref>]. The default dictionary of cTAKES is based on the Unified Medical Language System (UMLS) [<xref ref-type="bibr" rid="ref21">21</xref>] and provides annotations of diseases or disorders, signs or symptoms, anatomical sites, procedures, and medications. For example, the dictionaries based on SNOMED-CT and RxNORM, which is part of the UMLS, cover the fields of general clinical findings and medications. We investigated the compound terms in each dictionary for the analysis.</p>
      </sec>
      <sec>
        <title>Medical Information Mart for Intensive Care-III</title>
        <p>The Medical Information Mart for Intensive Care-III (MIMIC-III) is a free, open database provided by the Massachusetts Institute of Technology Laboratory for Computational Physiology, which includes approximately 60,000 deidentified admissions of patients at the Beth Israel Deaconess Medical Center from 2001 to 2012 [<xref ref-type="bibr" rid="ref22">22</xref>]. Using PostgreSQL, we queried the note events table of the MIMIC-III database, which includes approximately 520,000 radiology reports.</p>
      </sec>
      <sec>
        <title>Procedures</title>
        <p>The overall goal of our study is to clarify the coverage of RadLex-based dictionaries with compound terms and to construct and evaluate the NER tools that use the RadLex-based dictionaries for mining free-text radiology reports. First, we customized cTAKES to build the RadLex and GPD dictionaries. As previously mentioned, the default dictionaries of cTAKES provided by the UMLS are SNOMED-CT and RxNORM. Second, we combined these three dictionaries in the following patterns: Default, Default+RadLex, and Default+RadLex+GPD. Third, we removed single terms from each dictionary and evaluated their performance. Finally, we carried out the three processes of analysis (step 1 to step 3) to obtain profiles of the stem terms for improving the performance of NER (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of methods. cTAKES: clinical Text Analysis and Knowledge Extraction System; CtED: compound terms–enhanced dictionary; FN: false negative; NPI: noun phrase identification; TP: true positive.</p>
          </caption>
          <graphic xlink:href="jmir_v23i10e25378_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Creating Annotation Corpus of Radiology Reports</title>
        <p>We randomly selected 400 reports of computed tomography (CT), magnetic resonance imaging (MRI), positron emission computed tomography (PET), and radiography (x-ray) from the MIMIC-III database (100 reports for each imaging modality type). These reports were in a free-text format and were categorized into sections; we used the <italic>Findings</italic>, <italic>Interpretations</italic>, and <italic>Impressions</italic> sections, which play a core role in diagnosis. There were 28.9 sentences per report and 179.1 tokens per report. An additional 100 reports (25 reports for each imaging modality type) were randomly selected and used in the validation study for compound terms.</p>
        <p>We first conducted stop word removal and exchanged all the characters to the lower case. Next, we leveraged the AggregatePlaintextProcessor of cTAKES to identify noun phrases in the radiology reports so that we could perform a manual annotation for noun phrases. Next, we applied manual reviews to annotate compound terms. The compound terms were also tagged with all conceivable patterns based on the <italic>stem term</italic>. For example, the compound term <italic>right upper lobe</italic> is divided into <italic>right upper lobe</italic> and <italic>upper lobe</italic>. After the annotation, we can obtain two compound terms from <italic>right upper lobe</italic>. In this case, we defined <italic>lobe</italic> as a stem term. We also separated <italic>right upper lobe of lung base</italic> into <italic>right upper lobe</italic> and <italic>lung base.</italic> Thus, we defined the stem term as the modified term of a compound term in this study. These manual annotations were conducted and agreed on by 3 researchers with a background in radiology (n=1) and computer sciences (n=2). Generally, the annotations for compound terms are performed by expert radiologists and are agreed upon through discussion. As some studies have revealed that the annotation patterns of compound terms are different with institutions, we used all the patterns of compound terms as the gold standard.</p>
      </sec>
      <sec>
        <title>Developing a RadLex-Based NER Tool</title>
        <p>First, we created a customized NER tool using cTAKES, which uses a dictionary lookup–based parser for NER. It extracts terms that can be looked up in the installed dictionary. Some previous studies have attempted to create customized dictionaries (eg, UMLS) [<xref ref-type="bibr" rid="ref11">11</xref>] for NER, but few studies have investigated NER using RadLex for mining radiology reports [<xref ref-type="bibr" rid="ref23">23</xref>]. In this study, we built a customized dictionary using RadLex as a domain-specific dictionary and SentiWordNet as a GPD. RadLex can be used to automatically extract technical terms from radiology reports [<xref ref-type="bibr" rid="ref24">24</xref>], whereas SentiWordNet is usually used for sentiment analysis, which clarifies positive or negative descriptive text on social networking services. Moreover, we created dictionaries for compound terms. The terms of each dictionary were stored in the bar-separated value (BSV) file and located in the dictionary lookup–first directory, which allows the term to be extracted preferentially. cTAKES uses a SNOMED-CT and RxNORM dictionary by default. Finally, we created a collection of customized dictionaries in the following patterns: Default (SNOMED-CT and RxNORM), Default+RadLex, and Default+RadLex+GPD.</p>
      </sec>
      <sec>
        <title>Step 1: Performance Evaluation of Each Pipeline</title>
        <p>For each customized pipeline, we evaluated the performance of four different sets of the three dictionary patterns using standard measures (ie, precision, recall, and F-measure). The formulas for the measures are as follows:</p>
        <disp-formula>
        Precision = True positives / (True positives + False positives) × 100      <bold>(1)</bold></disp-formula>
        <disp-formula> Recall = True positives / (True positives + False negatives) × 100      <bold>(2)</bold></disp-formula>
        <disp-formula> F-measure = 2 × Precision × Recall / (Precision + Recall)      <bold>(3)</bold>
        </disp-formula>
        <p>Here, true positive (TP) is defined as the number of manual annotations matched with the dictionary phrases, false positive (FP) is defined as the number of dictionary phrases matched with entities other than manual annotations, and false negative (FN) is defined as the number of annotations not matched with the dictionary phrases. We also evaluated the performance of four major imaging modalities: CT, MRI, x-ray, and PET. GATE (General Architecture for Text Engineering) developer version 8.4.1 [<xref ref-type="bibr" rid="ref25">25</xref>] was used to compute these measures.</p>
      </sec>
      <sec>
        <title>Step 2: Creating and Evaluating a Compound Terms–Enhanced Dictionary</title>
        <p>We also created a compound terms–enhanced dictionary (CtED) to improve performance (<xref rid="figure1" ref-type="fig">Figure 1</xref>). We added these compound terms to the FN category (as identified in the initial evaluation) in the custom dictionaries that were used for parsing 400 radiology reports. At the same time, we removed these compound terms in the FP category from these dictionaries. To validate the performance of the CtED, we carried out NER for another 100 radiology reports (25 reports for each imaging modality type; <xref rid="figure1" ref-type="fig">Figure 1</xref>). Finally, we calculated the precision, recall, and F-measure for the performance evaluation.</p>
      </sec>
      <sec>
        <title>Step 3: Stem Term Analysis for Expanding Dictionary</title>
        <p>To obtain the full benefit of using RadLex, which is an ontology-based tool, we created 2 measures for a stem term. We first defined a measure called the occurrence ratio (OR) to determine the frequency of stem terms in TPs and FNs from step 2. The OR gives priority measures to add compound terms with stem terms into RadLex. For example, if the value of the OR for a stem term in TPs is high, it means that the number of compound terms (containing the stem term) that are correctly identified by the pipeline is high. In contrast, if the value of the OR for the stem term in FNs is high, it means that the number of compound terms (containing the stem term) that are identified as negative by the pipeline is high. Moreover, if a high OR stem with both TP and FN is identified, we can hypothesize that this stem shows that there is a high demand to extract the entity of reports but still lacks the compound terms having the stem. In short, the OR can visualize a profile of the demand and supply of stem term–oriented compound terms in the corpus.</p>
        <disp-formula>Occurrence ratio (%) = Occurrence of a stem term in TP or FN / Total number of stem terms in TPs or FNs × 100%      <bold>(4)</bold></disp-formula>
        <p>Second, we defined a measure called the matching ratio (MR) to describe the distribution of stem terms in FNs that are matched with the dictionaries. The MR (%) was calculated using the formula presented below. The MR can guide the basic concept of the RadLex or SNOMED-CT (cTAKES default dictionary) concept that matches the stem terms. For example, if a stem term of <italic>effusion</italic> is found in RadLex, we continue to trace the parent concept until the concept is under the top hierarchy. Finally, we identified the concept of <italic>clinical findings</italic>. The MR provides the criteria for identifying the number of concepts. We used 15 concepts under the RadLex entity (ie, <italic>anatomical entity</italic>, <italic>clinical finding</italic>, <italic>imaging modality</italic>, <italic>imaging observation</italic>, <italic>nonanatomical substance</italic>, <italic>object</italic>, <italic>procedure</italic>, <italic>process step</italic>, <italic>process</italic>, <italic>property</italic>, <italic>RadLex descriptor</italic>, <italic>RadLex nonanatomical set</italic>, <italic>report</italic>, <italic>report content</italic>, and <italic>temporary entity</italic>). Each stem term was tracked using their upper-class ID (RadLex ID). For the cTAKES default dictionary, we used 19 concepts under the top class of SNOMED-CT (RxNORM was excluded because it does not have a hierarchal structure). The class are <italic>Body structure</italic>, <italic>Clinical finding</italic>, <italic>Environment or geographical location</italic>, <italic>Event</italic>, <italic>Observable entity</italic>, <italic>Organism</italic>, <italic>Pharmaceutical/biologic product</italic>, <italic>Physical force</italic>, <italic>Physical object</italic>, <italic>Procedure</italic>, <italic>Qualifier value</italic>, <italic>Record artifact</italic>, <italic>Situation with explicit context</italic>, <italic>SNOMED-CT Model Component</italic>, <italic>Social context</italic>, <italic>Special concept</italic>, <italic>Specimen</italic>, <italic>Staging and scales</italic>, and <italic>Substance</italic>. We manually checked all stem terms based on the criteria of the exact match through the BioPortal site (National Center for Biomedical Ontology) [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        <disp-formula>Matching ratio (%) = Occurrence of a stem term in FN matched with RadLex or SNOMED-CT / Total number of stem terms in FNs × 100%      <bold>(5)</bold></disp-formula>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Performance Evaluation of Each Pipeline</title>
        <p>The F-measure of the pipeline with the dictionaries Default+RadLex+GPD for compound terms was nearly the same as that of the pipeline with the dictionaries Default+RadLex (31.5% vs 31.4%; <xref ref-type="table" rid="table1">Table 1</xref>). In step 2—building and evaluating the CtED—the F-measures of the pipeline with the dictionaries Default+RadLex+GPD with and without the CtED were 63.1% and 30.9%, respectively (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>F-measure, precision, and recall of each dictionary (step 1: number of reports=400).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Dictionaries</td>
                <td>F-measure, %</td>
                <td>Precision, %</td>
                <td>Recall, %</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Default</td>
                <td>27.9</td>
                <td>93.4</td>
                <td>16.4</td>
              </tr>
              <tr valign="top">
                <td>Default+RadLex</td>
                <td>31.4</td>
                <td>94.9</td>
                <td>18.8</td>
              </tr>
              <tr valign="top">
                <td>Default+RadLex+GPD<sup>a</sup></td>
                <td>31.5</td>
                <td>93.2</td>
                <td>19</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>GPD: general purpose dictionary.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>F-measure, precision, and recall of each dictionary (step 2: number of reports=100).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="550"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Dictionaries</td>
                <td>F-measure, %</td>
                <td>Precision, %</td>
                <td>Recall, %</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Default+RadLex+GPD<sup>a</sup> without enhancement</td>
                <td>30.9</td>
                <td>73.3</td>
                <td>19.6</td>
              </tr>
              <tr valign="top">
                <td>Default+RadLex+GPD with enhancement</td>
                <td>63.1</td>
                <td>82.8</td>
                <td>51</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>GPD: general purpose dictionary.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Regarding each imaging modality (<xref ref-type="table" rid="table3">Table 3</xref>), the F-measure of cTAKES+RadLex+GPD for x-ray was higher (64.3%) than that without enhancement (26.7%). The most frequent stem terms in the FNs were <italic>effusion</italic> (9.1% x-ray), <italic>change</italic> (3.5% CT), <italic>change</italic> (4.1% MRI), and <italic>uptake</italic> (12% PET; <xref ref-type="table" rid="table4">Table 4</xref>). The number of words in the compound terms in the FPs was mainly 2 (31,774/42,871, 74.12%), 3 (7876/42,871, 18.37%), and 4 (2271/42,871, 5.29%), which is approximately 97.78% (41,921/42,871) of all FNs.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>F-measure of the compound terms–enhanced dictionary of each modality.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="300"/>
            <col width="350"/>
            <thead>
              <tr valign="top">
                <td>Modality</td>
                <td>cTAKES<sup>a</sup>+RadLex+GPD<sup>b</sup> (%)</td>
                <td>cTAKES+RadLex+GPD+CtED<sup>c</sup> (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Computed tomography</td>
                <td>33.5</td>
                <td>62.4</td>
              </tr>
              <tr valign="top">
                <td>MRI<sup>d</sup></td>
                <td>30.7</td>
                <td>63.6</td>
              </tr>
              <tr valign="top">
                <td>PET<sup>e</sup></td>
                <td>30.3</td>
                <td>63.4</td>
              </tr>
              <tr valign="top">
                <td>x-ray</td>
                <td>26.7</td>
                <td>64.3</td>
              </tr>
              <tr valign="top">
                <td>All</td>
                <td>30.9</td>
                <td>63.1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>cTAKES: clinical Text Analysis and Knowledge Extraction System.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>GPD: general purpose dictionary.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>CtED: compound terms–enhanced dictionary.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>MRI: magnetic resonance imaging.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>PET: positron emission computed tomography.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Top five occurrence ratios in each imaging modality.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="0"/>
            <col width="400"/>
            <col width="0"/>
            <col width="350"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Modality</td>
                <td colspan="2">Stem</td>
                <td>OR<sup>a</sup>, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Computed tomography</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>TP<sup>b</sup> (n=1127)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">lobe</td>
                <td colspan="2">100 (8.87)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">effusion</td>
                <td colspan="2">59 (5.24)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">node</td>
                <td colspan="2">50 (4.44)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">artery</td>
                <td colspan="2">39 (3.46)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">hemorrhage</td>
                <td colspan="2">37 (3.28)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>FN<sup>c</sup>(n=3532)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">change</td>
                <td colspan="2">125 (3.54)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">collection</td>
                <td colspan="2">98 (2.77)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">lesion</td>
                <td colspan="2">95 (2.69)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">effusion</td>
                <td colspan="2">94 (2.66)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">evidence</td>
                <td colspan="2">69 (1.95)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>MRI<sup>d</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>TP (n=840)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">artery</td>
                <td colspan="2">146 (17.38)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">lobe</td>
                <td colspan="2">49 (5.83)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">sinus</td>
                <td colspan="2">29 (3.45)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">matter</td>
                <td colspan="2">20 (2.38)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">body</td>
                <td colspan="2">20 (2.38)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>FN (n=3732)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">change</td>
                <td colspan="2">176 (4.72)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">lesion</td>
                <td colspan="2">144 (3.86)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">enhancement</td>
                <td colspan="2">132 (3.54)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">evidence</td>
                <td colspan="2">95 (2.55)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">study</td>
                <td colspan="2">89 (2.38)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>PET<sup>e</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>TP (n=1123)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">node</td>
                <td colspan="2">192 (17.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">lobe</td>
                <td colspan="2">102 (9.08)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">gland</td>
                <td colspan="2">69 (6.14)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">nodule</td>
                <td colspan="2">39 (3.47)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">disease</td>
                <td colspan="2">36 (3.21)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>FN (n=4708)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">uptake</td>
                <td colspan="2">567 (12.04)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">node</td>
                <td colspan="2">250 (5.31)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">lesion</td>
                <td colspan="2">180 (3.82)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">avidity</td>
                <td colspan="2">169 (3.59)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">disease</td>
                <td colspan="2">157 (3.33)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>x-ray</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>TP (n=323)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">effusion</td>
                <td colspan="2">46 (14.24)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">tube</td>
                <td colspan="2">37 (11.45)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">lobe</td>
                <td colspan="2">27 (8.36)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">edema</td>
                <td colspan="2">18 (5.57)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">lung</td>
                <td colspan="2">17 (5.26)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>FN (n=1279)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">effusion</td>
                <td colspan="2">117 (9.15)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">tube</td>
                <td colspan="2">69 (5.39)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">opacity</td>
                <td colspan="2">67 (5.24)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">pneumothorax</td>
                <td colspan="2">62 (4.85)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td colspan="2">line</td>
                <td colspan="2"> 57 (4.46)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>OR: occurrence ratio.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>TP: true positive.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>FN: false negative.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>MRI: magnetic resonance imaging.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>PET: positron emission computed tomography.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In addition, the most frequent FPs that were removed from the cTAKES+RadLex+GPD dictionaries were <italic>related to</italic> (34/239, 14.2%), <italic>abdomen and pelvis</italic> (23/239, 9.6%), and <italic>head and neck</italic> (21/239, 8.8%).</p>
      </sec>
      <sec>
        <title>Most Frequent Stem Terms</title>
        <p>The ORs of the TPs and FNs in each imaging modality (step 3) are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. The stem terms of the TPs in the CT reports were more diverse than those in the MRI, PET, and x-ray reports. The FNs in the CT and MRI reports also showed the same trends. The most frequent stem terms in the TPs were <italic>lobe</italic> (100/1127, 8.87% CT), <italic>artery</italic> (146/840, 17.4% MRI), <italic>node</italic> (192/1123, 17.1% PET), and <italic>effusion</italic> (46/323, 14.2% x-ray; <xref ref-type="table" rid="table4">Table 4</xref>). In contrast, the most frequent stem terms in the FNs were <italic>change</italic> (125/3532, 3.54% CT), <italic>change</italic> (176/3732, 4.72% MRI), <italic>uptake</italic> (567/4708, 12.04% PET), and <italic>effusion</italic> (117/1279, 9.15% x-ray). <xref ref-type="table" rid="table4">Table 4</xref> shows that stem terms such as <italic>effusion</italic>, <italic>node</italic>, <italic>tube</italic>, and <italic>disease</italic> had a need in both TPs and FNs.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Occurrence ratio of true positives and false negatives in each imaging modality. CT: computed tomography; FN: false negative; MRI: magnetic resonance imaging; PET: positron emission computed tomography; TP: true positive.</p>
          </caption>
          <graphic xlink:href="jmir_v23i10e25378_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref ref-type="table" rid="table5">Table 5</xref> illustrates the distribution of the stem terms in FNs that are matched with a RadLex upper concept using the MR. The result of the MR was 71%, which included a connectivity of 47.4% with RadLex and 51.5% with the cTAKES default dictionary (SNOMED-CT and RxNORM). The stem terms that did not match RadLex and the cTAKES default dictionary accounted for 28.15% (3687/13,098). The matched classes in RadLex included clinical finding (1839/13,098, 14.04%), imaging observation (1508/13,098, 11.51%), and process (1000/13,098, 7.63%), and those in the cTAKES default dictionary included Body structure (1428/13,098, 10.9%), Over two category (1265/13,098, 9.66%), and Qualifier value (935/13,098, 7.14%).</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Classification of stem terms in false negatives based on cTAKES<sup>a</sup>, RadLex, and combined dictionary (n=13,098).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="600"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Stem terms</td>
                <td>Class</td>
                <td>Proportion, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>cTAKES default (SNOMED-CT<sup>b</sup>)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>N/A<sup>c</sup></td>
                <td>6349 (48.47)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Body structure</td>
                <td>1428 (10.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Over two categories</td>
                <td>1265 (9.66)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Qualifier value</td>
                <td>935 (7.14)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Clinical finding</td>
                <td>878 (6.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SNOMED-CT model component</td>
                <td>723 (5.52)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Procedure</td>
                <td>721 (5.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Environment or geographical location</td>
                <td>217 (1.66)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Physical object</td>
                <td>206 (1.57)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Substance</td>
                <td>143 (1.09)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td>233 (1.78)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>RadLex</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>N/A</td>
                <td>6893 (52.63)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Clinical finding</td>
                <td>1839 (14.04)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Imaging observation</td>
                <td>1508 (11.51)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Process</td>
                <td>1000 (7.63)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Anatomical entity</td>
                <td>997 (7.61)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Property</td>
                <td>295 (2.25)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RadLex descriptor</td>
                <td>248 (1.89)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Object</td>
                <td>210 (1.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Procedure</td>
                <td>91 (0.69)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Imaging modality</td>
                <td>11 (0.08)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Nonanatomical substance</td>
                <td>5 (0.04)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Report component</td>
                <td>1 (0.01)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>cTAKES default (SNOMED-CT)+RadLex</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>cTAKES+RadLex</td>
                <td>9411 (71.85)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>N/A</td>
                <td> 3687 (28.15)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>cTAKES: clinical Text Analysis and Knowledge Extraction System.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>SNOMED-CT: Systematized Nomenclature of Medicine-Clinical Terms.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Overview</title>
        <p>In this study, we first constructed RadLex-based NER tools for mining free-text radiology reports and evaluated the coverage of the pipelines (step 1). Second, we built a CtED extracted from the FNs of step 1 to improve performance (step 2). Third, we defined OR and MR to consider the potential of expanding the dictionary using RadLex ontology (step 3).</p>
      </sec>
      <sec>
        <title>Performance Evaluation of Each Pipeline (Step 1 and Step 2)</title>
        <p>First, the performance of cTAKES+RadLex+GPD was 30.9% (precision 73.3% and recall 19.6%) on its own and 63.1% (precision 82.8% and recall 51%) with the CtED. The CtED for compound terms increased the F-measure by 32.2%, but the F-measure was not obviously changed by the GPD (31.4% vs 31.5%). This indicated that the GPD did not cover the specific compound terms in radiology reports different from the single words. The merit of using RadLex is that we can use the standard vocabularies and relationships such as <italic>Is-A</italic> and <italic>May_cause</italic>. RadLex provides 15 concepts under the top entity, which can assign labels such as <italic>anatomical entity</italic> and <italic>clinical finding</italic> to each entity.</p>
        <p>Our tool using cTAKES was able to customize dictionaries by creating a BSV file, which provides a convenient way to leverage those vocabulary resources that are not covered by the default dictionary. In addition, the BSV file stores IDs that can be used to track the parent concepts for a particular term, which enables the classification or profiling of extracted terms using high-level concept classes defined in a vocabulary.</p>
      </sec>
      <sec>
        <title>Stem Term Analysis for Expanding Dictionary (Step 3)</title>
        <p>The OR provides profiles of <italic>demand and supply</italic> for stem terms in the corpus. For example, the stem terms of <italic>disease</italic> (PET), <italic>node</italic> (PET), <italic>effusion</italic> (x-ray), and <italic>tube</italic> (x-ray) had a high OR value in both TPs and FNs (<xref ref-type="table" rid="table4">Table 4</xref>). This means that creating compound terms with high OR–value stem terms in FNs potentially improves precision for capturing entities in each modality’s reports compared with the effort of applying the other vast vocabularies in the pipeline. In addition, the features of the FNs also showed that 97.78% (41,921/42,871) of the compound terms consisted of 2-4 words. This fact suggests that NER performance can be effectively improved by identifying <italic>1 to 3 modified words</italic> and <italic>stem term from each imaging modality.</italic> With regard to the MR, RadLex improved 20.33% of the connectivity with stem terms in the FNs compared with the cTAKES default (SNOMED-CT). The contribution of the improvement can provide criteria in terms of whether we should add phrases to RadLex or to SNOMED-CT. Therefore, stem term–related information such as OR and MR would contribute to expanding dictionaries that have ontological structures. This kind of dictionary-based NER would provide ontology-based benefits such as reasoning concepts and using standard codes and vocabularies. Although it is known that CRFs achieve a higher F-measure than dictionary-based approaches, CRFs generate entities that have no hierarchical structure and relationships.</p>
        <p>In contrast, our approach is based on an ontology, which enables interoperable processing and data mining of reports. For example, when we identify the term <italic>pleural effusion</italic>, RadLex ontology can guide us to the parent class <italic>effusion</italic> so that we can finally reach the <italic>Clinical findings</italic> tracking upper concepts. RadLex can also provide relationships such as <italic>pleural effusion may cause of vascular cut-off sign</italic>.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The limitation of this study is that our pipeline is optimized for identifying short compound terms because we divided compound terms using stop words such as <italic>and</italic>. For example, we set the stop word <italic>and</italic> so that we lead to separate the compound term <italic>abdomen and neck</italic> into <italic>abdomen</italic> and <italic>neck</italic>. This approach has the merit of identifying as possible as the stem term, splitting the long phrase <italic>right pleural effusion and left lung pneumothorax</italic> into <italic>right pleural effusion</italic> and <italic>left lung pneumothorax</italic>. Therefore, in the case of capturing long compound terms, we need to combine short phrases. Generally, noun phrase identification for free-text radiology reports is considered difficult because there are many variants of long compound terms. We believe that our method has the potential to capture long compound terms when applying a combination of single and short compound terms.</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>The annotation tool GATE that we used can identify a partial match with TPs, which means that the types of NER are the same, but the span is not the same. In this study, such partial positives were treated as FNs. We reviewed these uncertainty negatives based on the rule of the stem words and found that 35.4% (90/254) of the partial positives had the potential to change into TPs. This was equivalent to 0.7% of the increased F-measure (cTAKES+RadLex+GPD+CtED). The details of the partial match require further analysis.</p>
        <p>The study by Jiang et al [<xref ref-type="bibr" rid="ref27">27</xref>] demonstrated a state-of-the-art text-mining tool of the Stanford Parser. The study’s results showed that POS-based grammatical approaches are efficient in capturing named entities in free-text radiology reports. In future work, we will extract the POS information to define a pattern of the modified words of the compound term.</p>
        <p>Lately, Word2Vec technology has been explored for generating synonyms and expanding the radiology-specific dictionary [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. These studies claimed that a machine learning technology such as Word2Vec supports the building of enhanced dictionaries and reduces the annotation cost. We agree with this claim and believe that it is useful to use Word2Vec to calculate vectors of single terms in the noun phrase, creating modifiers for each stem term. In future work, we will generate modified words using this type of machine learning approach. The customized text-mining tool combined with machine learning technology can help further extract features from radiology reports.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we developed a customized NER tool based on RadLex for the recognition of technical terms. We demonstrated that the CtED and stem term analysis have the potential to improve the performance of the dictionary-based NER with regard to expanding vocabularies.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BSV</term>
          <def>
            <p>bar-separated value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CT</term>
          <def>
            <p>computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">cTAKES</term>
          <def>
            <p>clinical Text Analysis and Knowledge Extraction System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CtED</term>
          <def>
            <p>compound terms–enhanced dictionary</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">FP</term>
          <def>
            <p>false positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">GATE</term>
          <def>
            <p>General Architecture for Text Engineering</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">GPD</term>
          <def>
            <p>general purpose dictionary</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MR</term>
          <def>
            <p>matching ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">MRI</term>
          <def>
            <p>magnetic resonance imaging</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">OR</term>
          <def>
            <p>occurrence ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">PET</term>
          <def>
            <p>positron emission computed tomography</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">POS</term>
          <def>
            <p>parts of speech</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">SNOMED-CT</term>
          <def>
            <p>Systematized Nomenclature of Medicine-Clinical Terms</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nishimoto</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Terae</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Uesugi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ogasawara</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sakurai</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Development of a medical-text parsing algorithm based on character adjacent probability distribution for Japanese radiology reports</article-title>
          <source>Methods Inf Med</source>
          <year>2008</year>
          <volume>47</volume>
          <issue>6</issue>
          <fpage>513</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.3414/me9127</pub-id>
          <pub-id pub-id-type="medline">19057808</pub-id>
          <pub-id pub-id-type="pii">08060513</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pool</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Goergen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Quality of the written radiology report: a review of the literature</article-title>
          <source>J Am Coll Radiol</source>
          <year>2010</year>
          <month>08</month>
          <volume>7</volume>
          <issue>8</issue>
          <fpage>634</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jacr.2010.03.016</pub-id>
          <pub-id pub-id-type="medline">20678733</pub-id>
          <pub-id pub-id-type="pii">S1546-1440(10)00157-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Omary</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Bettmann</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Cardella</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Bakal</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartzberg</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Sacks</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rholl</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Meranze</surname>
              <given-names>SG</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>CA</given-names>
            </name>
            <collab>Society of Interventional Radiology Standards of Practice Committee</collab>
          </person-group>
          <article-title>Quality improvement guidelines for the reporting and archiving of interventional radiology procedures</article-title>
          <source>J Vasc Interv Radiol</source>
          <year>2003</year>
          <month>09</month>
          <volume>14</volume>
          <issue>9 Pt 2</issue>
          <fpage>293</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1097/01.rvi.0000094601.83406.e1</pub-id>
          <pub-id pub-id-type="medline">14514836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <article-title>International statistical classification of diseases and related health problems (ICD)</article-title>
          <source>World Health Organization</source>
          <access-date>2021-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/classifications/classification-of-diseases">https://www.who.int/classifications/classification-of-diseases</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Markwell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sato</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cheetham</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Representing clinical information using SNOMED Clinical Terms with different structural information models</article-title>
          <source>Proceedings of the 3rd international conference on Knowledge Representation in Medicine (KR-MED 2008)</source>
          <year>2008</year>
          <conf-name>3rd international conference on Knowledge Representation in Medicine (KR-MED 2008)</conf-name>
          <conf-date>May 31st - June 2nd, 2008</conf-date>
          <conf-loc>Phoenix, Arizona, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.semanticscholar.org/paper/Representing-Clinical-Information-using-SNOMED-with-Markwell-Sato/756ae1e77f06c4a93a7b16a342e374424c1cc76c#paper-header"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cotik</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Filippo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Castaño</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>An approach for automatic classification of radiology reports in Spanish</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2015</year>
          <volume>216</volume>
          <fpage>634</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="medline">26262128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kahn Jr</surname>
              <given-names>CE</given-names>
            </name>
          </person-group>
          <article-title>Content analysis of reporting templates and free-text radiology reports</article-title>
          <source>J Digit Imaging</source>
          <year>2013</year>
          <month>10</month>
          <volume>26</volume>
          <issue>5</issue>
          <fpage>843</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23553231"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10278-013-9597-4</pub-id>
          <pub-id pub-id-type="medline">23553231</pub-id>
          <pub-id pub-id-type="pmcid">PMC3782601</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
          </person-group>
          <article-title>RadLex: a new method for indexing online educational materials</article-title>
          <source>Radiographics</source>
          <year>2006</year>
          <volume>26</volume>
          <issue>6</issue>
          <fpage>1595</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1148/rg.266065168</pub-id>
          <pub-id pub-id-type="medline">17102038</pub-id>
          <pub-id pub-id-type="pii">26/6/1595</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oberkampf</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zillner</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Overton</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cavallaro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Uder</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hammon</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Semantic representation of reported measurements in radiology</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2016</year>
          <month>01</month>
          <day>22</day>
          <volume>16</volume>
          <fpage>5</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-016-0248-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-016-0248-9</pub-id>
          <pub-id pub-id-type="medline">26801764</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-016-0248-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC4722630</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Segrelles</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Medina</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Blanquer</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Martí-Bonmatí</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Increasing the efficiency on producing radiology reports for breast cancer diagnosis by means of structured reports. A comparative study</article-title>
          <source>Methods Inf Med</source>
          <year>2017</year>
          <month>05</month>
          <day>18</day>
          <volume>56</volume>
          <issue>3</issue>
          <fpage>248</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.3414/ME16-01-0091</pub-id>
          <pub-id pub-id-type="medline">28220929</pub-id>
          <pub-id pub-id-type="pii">16-01-0091</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lacson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ip</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Valtchinov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Raja</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Osterbur</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Khorasani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Evaluating terminologies to enable imaging-related decision rule sharing</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2017</year>
          <month>2</month>
          <day>10</day>
          <volume>2016</volume>
          <fpage>2082</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28269968"/>
          </comment>
          <pub-id pub-id-type="medline">28269968</pub-id>
          <pub-id pub-id-type="pmcid">PMC5333322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Vyas</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Toland</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Vreeman</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Abhyankar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
          </person-group>
          <article-title>Use of radiology procedure codes in health care: the need for standardization and structure</article-title>
          <source>Radiographics</source>
          <year>2017</year>
          <volume>37</volume>
          <issue>4</issue>
          <fpage>1099</fpage>
          <lpage>110</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28696857"/>
          </comment>
          <pub-id pub-id-type="doi">10.1148/rg.2017160188</pub-id>
          <pub-id pub-id-type="medline">28696857</pub-id>
          <pub-id pub-id-type="pmcid">PMC5548452</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kundeti</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vijayananda</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mujjiga</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kalyan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Clinical named entity recognition: challenges and opportunities</article-title>
          <source>Proceedings of the IEEE International Conference on Big Data (Big Data)</source>
          <year>2016</year>
          <conf-name>2016 IEEE International Conference on Big Data (Big Data)</conf-name>
          <conf-date>Dec 5-8, 2016</conf-date>
          <conf-loc>Washington, DC, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/bigdata.2016.7840814</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Névéol</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lavergne</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yasunaga</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Clément</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Morello</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Burgun</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of radiology reports for the detection of thromboembolic diseases and clinically relevant incidental findings</article-title>
          <source>BMC Bioinformatics</source>
          <year>2014</year>
          <month>08</month>
          <day>07</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>266</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-266"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-15-266</pub-id>
          <pub-id pub-id-type="medline">25099227</pub-id>
          <pub-id pub-id-type="pii">1471-2105-15-266</pub-id>
          <pub-id pub-id-type="pmcid">PMC4133634</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassanpour</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
          </person-group>
          <article-title>Information extraction from multi-institutional radiology reports</article-title>
          <source>Artif Intell Med</source>
          <year>2016</year>
          <month>01</month>
          <volume>66</volume>
          <fpage>29</fpage>
          <lpage>39</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26481140"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2015.09.007</pub-id>
          <pub-id pub-id-type="medline">26481140</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(15)00124-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC5221793</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasić</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Button</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>KneeTex: an ontology-driven system for information extraction from MRI reports</article-title>
          <source>J Biomed Semantics</source>
          <year>2015</year>
          <month>9</month>
          <day>7</day>
          <volume>6</volume>
          <fpage>34</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-015-0033-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13326-015-0033-1</pub-id>
          <pub-id pub-id-type="medline">26347806</pub-id>
          <pub-id pub-id-type="pii">33</pub-id>
          <pub-id pub-id-type="pmcid">PMC4561435</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mowery</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Madden</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Keyhani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
          </person-group>
          <article-title>Extracting a stroke phenotype risk factor from Veteran Health Administration clinical reports: an information content analysis</article-title>
          <source>J Biomed Semantics</source>
          <year>2016</year>
          <month>5</month>
          <day>10</day>
          <volume>7</volume>
          <fpage>26</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-016-0065-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13326-016-0065-1</pub-id>
          <pub-id pub-id-type="medline">27175226</pub-id>
          <pub-id pub-id-type="pii">65</pub-id>
          <pub-id pub-id-type="pmcid">PMC4863379</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <article-title>Information about RadLex</article-title>
          <source>RSNA Informatics</source>
          <access-date>2021-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://radlex.org/">http://radlex.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <article-title>aesuli/SentiWordNet</article-title>
          <source>GitHub</source>
          <access-date>2021-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/aesuli/sentiwordnet">https://github.com/aesuli/sentiwordnet</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>History</article-title>
          <source>cTAKES</source>
          <access-date>2021-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://ctakes.apache.org/history.html">http://ctakes.apache.org/history.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ogren</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>507</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20819853"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
          <pub-id pub-id-type="medline">20819853</pub-id>
          <pub-id pub-id-type="pii">17/5/507</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <month>05</month>
          <day>24</day>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/sdata.2016.35"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hazen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Van Esbroeck</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Mongkolwat</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Channin</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>Automatic extraction of concepts to extend RadLex</article-title>
          <source>J Digit Imaging</source>
          <year>2011</year>
          <month>02</month>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>165</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20838847"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10278-010-9334-1</pub-id>
          <pub-id pub-id-type="medline">20838847</pub-id>
          <pub-id pub-id-type="pmcid">PMC3046782</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beyer</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>McKee</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Regis</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>McKee</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Flacke</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>El Saadawi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wald</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Automatic Lung-RADS™ classification with a natural language processing system</article-title>
          <source>J Thorac Dis</source>
          <year>2017</year>
          <month>09</month>
          <volume>9</volume>
          <issue>9</issue>
          <fpage>3114</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.21037/jtd.2017.08.13"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/jtd.2017.08.13</pub-id>
          <pub-id pub-id-type="medline">29221286</pub-id>
          <pub-id pub-id-type="pii">jtd-09-09-3114</pub-id>
          <pub-id pub-id-type="pmcid">PMC5708435</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <article-title>Homepage</article-title>
          <source>GATE</source>
          <access-date>2021-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://gate.ac.uk/">https://gate.ac.uk/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <article-title>Welcome to BioPortal</article-title>
          <source>BioPortal</source>
          <access-date>2021-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bioportal.bioontology.org/">https://bioportal.bioontology.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Parsing clinical text: how good are the state-of-the-art parsers?</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2015</year>
          <volume>15 Suppl 1</volume>
          <issue>Suppl 1</issue>
          <fpage>S2</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-15-S1-S2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1472-6947-15-S1-S2</pub-id>
          <pub-id pub-id-type="medline">26045009</pub-id>
          <pub-id pub-id-type="pii">1472-6947-15-S1-S2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4460747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maldonado</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Goodwin</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Skinner</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Harabagiu</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Deep learning meets biomedical ontologies: knowledge embeddings for epilepsy</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2018</year>
          <month>4</month>
          <day>16</day>
          <volume>2017</volume>
          <fpage>1233</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29854192"/>
          </comment>
          <pub-id pub-id-type="medline">29854192</pub-id>
          <pub-id pub-id-type="pmcid">PMC5977726</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <article-title>Extraction of medical knowledge from clinical reports and chest x-rays using machine learning techniques</article-title>
          <source>University of Alicante</source>
          <access-date>2021-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://rua.ua.es/dspace/handle/10045/102193">http://rua.ua.es/dspace/handle/10045/102193</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
