<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v24i11e40361</article-id>
      <article-id pub-id-type="pmid">36427233</article-id>
      <article-id pub-id-type="doi">10.2196/40361</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>The Generation of a Lung Cancer Health Factor Distribution Using Patient Graphs Constructed From Electronic Medical Records: Retrospective Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wang</surname>
            <given-names>Zixing</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mircheva</surname>
            <given-names>Iskra</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Anjun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4209-8301</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Ran</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6421-3361</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Erman</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2363-3738</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Han</surname>
            <given-names>Ruobing</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5932-9829</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wen</surname>
            <given-names>Jian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6370-3964</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Qinghua</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4547-8513</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Zhiyong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3112-3807</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>Bairong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Institutes for System Genetics</institution>
            <institution>West China Hospital</institution>
            <addr-line>2222 Xingchuan Road</addr-line>
            <addr-line>Chengdu, 610212</addr-line>
            <country>China</country>
            <phone>86 15995854635</phone>
            <email>bairong.shen@scu.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2899-1531</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Institutes for System Genetics</institution>
        <institution>West China Hospital</institution>
        <addr-line>Chengdu</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>iHealthd Shanghai Inc</institution>
        <addr-line>Shanghai</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Guilin Medical University Affiliateted Hospital</institution>
        <addr-line>Guilin</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Guilin Medical University</institution>
        <addr-line>Guilin</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Bairong Shen <email>bairong.shen@scu.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>25</day>
        <month>11</month>
        <year>2022</year>
      </pub-date>
      <volume>24</volume>
      <issue>11</issue>
      <elocation-id>e40361</elocation-id>
      <history>
        <date date-type="received">
          <day>21</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>30</day>
          <month>8</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>9</day>
          <month>9</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>25</day>
          <month>10</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Anjun Chen, Ran Huang, Erman Wu, Ruobing Han, Jian Wen, Qinghua Li, Zhiyong Zhang, Bairong Shen. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 25.11.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2022/11/e40361" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Electronic medical records (EMRs) of patients with lung cancer (LC) capture a variety of health factors. Understanding the distribution of these factors will help identify key factors for risk prediction in preventive screening for LC.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to generate an integrated biomedical graph from EMR data and Unified Medical Language System (UMLS) ontology for LC, and to generate an LC health factor distribution from a hospital EMR of approximately 1 million patients.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The data were collected from 2 sets of 1397 patients with and those without LC. A patient-centered health factor graph was plotted with 108,000 standardized data, and a graph database was generated to integrate the graphs of patient health factors and the UMLS ontology. With the patient graph, we calculated the connection delta ratio (CDR) for each of the health factors to measure the relative strength of the factor’s relationship to LC.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The patient graph had 93,000 relations between the 2794 patient nodes and 650 factor nodes. An LC graph with 187 related biomedical concepts and 188 horizontal biomedical relations was plotted and linked to the patient graph. Searching the integrated biomedical graph with any number or category of health factors resulted in graphical representations of relationships between patients and factors, while searches using any patient presented the patient’s health factors from the EMR and the LC knowledge graph (KG) from the UMLS in the same graph. Sorting the health factors by CDR in descending order generated a distribution of health factors for LC. The top 70 CDR-ranked factors of disease, symptom, medical history, observation, and laboratory test categories were verified to be concordant with those found in the literature.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>By collecting standardized data of thousands of patients with and those without LC from the EMR, it was possible to generate a hospital-wide patient-centered health factor graph for graph search and presentation. The patient graph could be integrated with the UMLS KG for LC and thus enable hospitals to bring continuously updated international standard biomedical KGs from the UMLS for clinical use in hospitals. CDR analysis of the graph of patients with LC generated a CDR-sorted distribution of health factors, in which the top CDR-ranked health factors were concordant with the literature. The resulting distribution of LC health factors can be used to help personalize risk evaluation and preventive screening recommendations.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>lung cancer</kwd>
        <kwd>risk factor</kwd>
        <kwd>patient graph</kwd>
        <kwd>UMLS knowledge graph</kwd>
        <kwd>Unified Medical Language System</kwd>
        <kwd>connection delta ratio</kwd>
        <kwd>EMR</kwd>
        <kwd>electronic health record</kwd>
        <kwd>EHR</kwd>
        <kwd>electronic health record</kwd>
        <kwd>cancer</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Early lung cancer (LC) detection is a key strategy to combat this deadly disease worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. The National Lung Screening Trial in the United States and similar clinical trials around the world have shown an approximately 20% reduction in mortality from LC as a result of screening with low-dose computed tomography [<xref ref-type="bibr" rid="ref2">2</xref>]. Based on these studies, LC screening medical guidelines as well as statistical risk prediction models including PLCO<sub>M2012</sub> have been implemented to recommend screening for smokers [<xref ref-type="bibr" rid="ref3">3</xref>]. However, screening is not commonly recommended for nonsmokers even though they represent a significant percentage of patients with LC worldwide, 15%-20% among male patients and over 50% among female patients [<xref ref-type="bibr" rid="ref4">4</xref>]. In addition, adoption of LC screening is still very low. For example, only approximately 5% of the at-risk population received their annual screening in the United States [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      <p>Risk-based or personalized screening approaches are being studied to overcome these challenges [<xref ref-type="bibr" rid="ref6">6</xref>]. We believe that a deeper understanding of the spectrum of risk factors for LC and applying technologies such as machine learning and knowledge graphs (KGs) will generate more cost-effective screening solutions.</p>
      <p>KGs have been widely applied in biomedical research. For interpreting proteomics data, a large-scale clinical KG has been plotted from biomedical data using the Neo4j tool [<xref ref-type="bibr" rid="ref7">7</xref>]. Open-source graph databases and tools including Neo4j have made it easier to build and analyze KGs [<xref ref-type="bibr" rid="ref8">8</xref>]. Studies have also demonstrated that construction of high-quality patient KGs from electronic medical records (EMRs) using rudimentary concept extraction is feasible and that the KGs can be used to predict diagnosis on the basis of symptoms [<xref ref-type="bibr" rid="ref9">9</xref>]. Even though graphical representation of patient data holds the promise to illuminate insights in health care and to transform such insights gleaned from EMR data into actionable knowledge, the application of EMR-wide graphs for studying individual disease diagnosis journeys or treatment processes is still limited [<xref ref-type="bibr" rid="ref10">10</xref>]. A graphical data model has been constructed, integrating clinical and molecular data of patients with non–small cell LC in the Cancer Genome Atlas LC data sets [<xref ref-type="bibr" rid="ref11">11</xref>]. Another recent study of synthetic patients proposed a new graphical method to identify any particular disease’s potential risk factor distribution from EMR (personal communication by A Chen, March 1, 2022).</p>
      <p>The Unified Medical Language System (UMLS) ontology, freely available from the National Library of Medicine, is a KG consisting of millions of nodes and relationships [<xref ref-type="bibr" rid="ref12">12</xref>]. It forms the foundation of interoperable biomedical information systems and services, including electronic health records. Connecting the UMLS KG to patient graphs may enable semantic search of patient data and support clinical decision-making [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
      <p>This study aimed to construct a patient health factor graph for LC from a hospital EMR and integrate it with the UMLS KG for graph search and risk factor analysis. Through graph search, the study also aimed to generate a distribution of LC health factors, which was expected to help implement personalized LC risk evaluation for preventive screening.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>EMR Health Factor Data Collection</title>
        <p>We deidentified the patient records from January 2018 to June 2021 and saved them on a secured data server controlled by the hospital’s informatics department. The data set had approximately 1 million patients and 7 million encounters including both outpatients and inpatients, in which patient names, dates of birth, contacts, and addresses were removed. The original identifiers of patients and encounters were replaced by irrelevant random numbers. Before using the data, our research team members were trained in the hospital’s patient data security and privacy policy.</p>
        <p>Because the EMR data had no usable codes associated with the diagnoses, synonyms of LC in Chinese were used to search for patients with LC. A total of 1397 patients with LC aged ≥30 years were included in the target data set. The same number (n=1397) of patients without LC and aged ≥30 years were randomly selected as control (or background) patients for comparison purposes.</p>
        <p>Deidentified records of outpatient and inpatient visits, diagnoses, laboratory tests, and procedures were imported into a custom data collection tool on the secured data server. The data tool automatically extracted laboratory test data and saved them in the database. Researchers manually selected data from text records and entered them into the database. Because the records were not coded, practical rules were developed to improve consistency in the data collection process. Synonyms were automatically converted to “local standard terms” and the resulting data were called “local standard data.” For each patient, only data from before the final diagnosis of LC were collected for studying disease risk factors, and a patient diagnosis journey (PDJ) object was created in the data tool to contain 1 or multiple encounters leading to the final diagnosis. When exporting PDJ data to a CSV file for analysis, only the latest data for each health factor in PDJ were selected. The final raw data set contained near 50,000 data from patients with LC and over 58,000 data from background patients. There were over 3000 different health factors identified in these data.</p>
      </sec>
      <sec>
        <title>Patient Graph Construction</title>
        <p>To simplify the patient graph, continuous numerical data were converted to categorical data. For example, values of age were converted to categories (ranges), including 30-50, 50-70, and &#62;70 years; the value of drinking was “true” if the patient consumed &#62;1 drink per day; the value of smoking was “true” if the patient smoked &#62;1 cigarette per day. Laboratory findings from the EMR were already recorded as categorical variables: normal or abnormal; true or false; positive or negative; high, medium, or low; and up, down, or normal. After value conversion, approximately 93,000 standard data for about 550 factors (ie, codes) that appeared in at least 10 patients with LC were selected and saved into a factor import CSV file. The format of the factor import file was as follows: virtual-id, category, code, term, value, unit, converted-value, and date. Patients with LC and background patients (N=2794) were both saved in a patient import file, one patient per line, with the following format: virtual-id, LC-label (1 for LC, 0 for background), and factor-count.</p>
        <p>We used the Neo4j Desktop tool (version 4.4) available freely from Neo4j Inc, which is a graph database with a graphical user interface (Neo4j Browser) to query with Cypher language and view graphs. It provides an application programming interface through a Python driver. It can load data from CSV files to construct graphs. In our patient-centered graph model, each patient was represented by a “Patient” node (total of 2794 patient nodes), while health factor and value pairs were represented by 650 factor nodes. Because all values were categorical and some health factors had more than 1 piece of categorical data, the number of factor-value pair nodes increased from 550 to 650. The health factors were further subdivided into the following categories: Condition, Symptom, Observation, History, RiskFactor, Labtest, Procedure, Medication, and Treatment. The graph drew over 93,000 connections from patients to factors. Constraints were created on each label to ensure uniqueness. Patient nodes required virtual-id while all factor nodes required category, code, and converted-value as node key.</p>
      </sec>
      <sec>
        <title>UMLS Disease Subgraph Construction</title>
        <p>The UMLS 2020AB release was downloaded from the National Library of Medicine’s UMLS website and installed locally by following the provided instructions. The local UMLS ontology had 2.8 million concepts, 8.3 million terms, and 39.1 million relationships. For generating an LC UMLS subgraph, we directly used the concept file MRCONSO.RRF and relation file MRREL.RRF in rich release format to generate Neo4j graph import files. The LC codes were first expanded to a more complete set of LC codes using the UMLS hierarchy (<xref ref-type="table" rid="table1">Table 1</xref>). We then used the expanded concept unique identifiers to find all horizontal relations (approximately 1100) between these LC target concepts and other biomedical concepts from over 39 million relations in UMLS ontology. The relations discovered were filtered by a selected set of UMLS relationship attributes for biological or medical concepts (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>); these were categorized into either biological concept relationships (called “biorel”) or medical concept relationships (called “medrel”). To visualize this simple categorization of biomedical knowledge, we added RelCat nodes between TargetConcept nodes and related Concept nodes in the UMLS subgraph as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. We then introduced a single AbstractPatient node to connect with all LC TargetConcept nodes. Connecting the patient nodes in EMR graph to the single AbstractPatient node resulted in an integrated biomedical graph that can present any patient’s health factors together with biomedical knowledge from UMLS ontology for LC.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Expanded lung cancer concepts in the Unified Medical Language System (UMLS) hierarchy.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="430"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td>UMLS concept unique identifiers</td>
                <td>Term</td>
                <td>SNOMEDCT code</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>C0581834</td>
                <td>Suspected lung cancer</td>
                <td>162573006</td>
              </tr>
              <tr valign="top">
                <td>C0242379</td>
                <td>Malignant neoplasm of lung</td>
                <td>363358000</td>
              </tr>
              <tr valign="top">
                <td>C0149925</td>
                <td>Small cell carcinoma of lung</td>
                <td>254632001</td>
              </tr>
              <tr valign="top">
                <td>C0007131</td>
                <td>Non-Small Cell Lung Carcinoma</td>
                <td>254637007</td>
              </tr>
              <tr valign="top">
                <td>C0152013</td>
                <td>Adenocarcinoma of lung (disorder)</td>
                <td>254626006</td>
              </tr>
              <tr valign="top">
                <td>C0149782</td>
                <td>Squamous cell carcinoma of lung</td>
                <td>254634000</td>
              </tr>
              <tr valign="top">
                <td>C1306460</td>
                <td>Primary malignant neoplasm of lung</td>
                <td>93880001</td>
              </tr>
              <tr valign="top">
                <td>C0153676</td>
                <td>Secondary malignant neoplasm of lung</td>
                <td>94391008</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <boxed-text id="box1" position="float">
          <title>List of Unified Medical Language System (UMLS) relationship attributes and categories.</title>
          <p>
            <bold>Biological concept relationships:</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>gene_associated_with_disease</p>
            </list-item>
            <list-item>
              <p>gene_involved_in_pathogenesis_of_disease</p>
            </list-item>
            <list-item>
              <p>gene_mapped_to_disease</p>
            </list-item>
            <list-item>
              <p>gene_product_malfunction_associated_with_disease</p>
            </list-item>
            <list-item>
              <p>gene_product_is_biomarker_of</p>
            </list-item>
            <list-item>
              <p>may_be_cytogenetic_abnormality_of_disease</p>
            </list-item>
            <list-item>
              <p>may_be_molecular_abnormality_of_disease</p>
            </list-item>
          </list>
          <p>
            <bold>Medical concept relationships:</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>may_treat</p>
            </list-item>
            <list-item>
              <p>regimen_has_accepted_use_for_disease</p>
            </list-item>
            <list-item>
              <p>has_associated_finding</p>
            </list-item>
            <list-item>
              <p>associated_finding_of</p>
            </list-item>
            <list-item>
              <p>associated_disease</p>
            </list-item>
            <list-item>
              <p>is_finding_of_disease</p>
            </list-item>
            <list-item>
              <p>related_to</p>
            </list-item>
            <list-item>
              <p>clinically_associated_with</p>
            </list-item>
            <list-item>
              <p>co-occurs_with</p>
            </list-item>
            <list-item>
              <p>may_be_associated_disease_of_disease</p>
            </list-item>
            <list-item>
              <p>may_be_finding_of_disease</p>
            </list-item>
          </list>
        </boxed-text>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Biomedical graph model for the integration of the electronic medical record patient graph with the Unified Medical Language System knowledge graph of lung cancer. Numbered relationship labels are listed in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
          </caption>
          <graphic xlink:href="jmir_v24i11e40361_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Node and relationship labels in the integrated biomedical graph model (shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="220"/>
            <col width="320"/>
            <col width="310"/>
            <thead>
              <tr valign="top">
                <td>Number</td>
                <td>From node label</td>
                <td>Relationship labels</td>
                <td>To node label</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Patient</td>
                <td>HAS_CONDITION</td>
                <td>Condition</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Patient</td>
                <td>HAS_SYMPTOM</td>
                <td>Symptom</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Patient</td>
                <td>HAS_PROCEDURE</td>
                <td>Procedure</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Patient</td>
                <td>HAS_MEDICATION</td>
                <td>Medication</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Patient</td>
                <td>HAS_TREATMENT</td>
                <td>Treatment</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Patient</td>
                <td>HAS_OBSERVATION</td>
                <td>Observation</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Patient</td>
                <td>HAS_RISKFACTOR</td>
                <td>RiskFactor</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Patient</td>
                <td>HAS_HISTORY</td>
                <td>History</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Patient</td>
                <td>HAS_LABTEST</td>
                <td>Labtest</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Patient</td>
                <td>INSTANCE_OF</td>
                <td>AbstractPatient</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>AbstractPatient</td>
                <td>MAY_HAVE_TARGET</td>
                <td>TargetConcept</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>TargetConcept</td>
                <td>HAS_RELCAT</td>
                <td>RelCat</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>RelCat</td>
                <td>HAS_RELA</td>
                <td>Concept</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Patient Health Factor Distribution</title>
        <p>We developed a Python script to automatically query the patient graph with each of the health factors. The number of connections from each factor to LC target patients (depicted as “TPC” in equation 1) and background patients (depicted as “BPC” in equation 1) in the search results were counted separately. For each factor, the delta of patient connection counts was calculated by subtracting the number of background patient connections from that of the target patient connections. Division of the delta by the total number of patient connections yielded the “connection delta ratio” (CDR), a relative measure of the strength of connections from a factor to the target patient. Sorting factors by CDR and plotting a graph of CDR versus the sorted factors yielded a distribution of LC health factors from high to low strength.</p>
        <p>
          <disp-formula>CDR = (TPC – BPC) / (TPC + BPC) <bold>(1)</bold></disp-formula>
        </p>
        <p>A CDR between 1 and 0 implied that the factor was more related to the target patient, 1 being most related. A CDR below 0 implied that the factor was more related to the background patient.</p>
        <p>In this study, factors with a CDR of &#62;0.5 and having connections with at least 10 patients with LC were selected for literature verification. The local standard terms were first translated to English and the corresponding UMLS concepts as well as standard codes from SNOMEDCT_US, LOINC, or RxNORM if possible. We then searched the research literature on Google, Google Scholar, PubMed for each health factor and reviewed the published studies to verify whether the health factors were confirmed risk factors, correlated with LC, were unrelated to LC, or had an unsure relationship with LC. If a factor’s relationship with LC was inconclusive in existing research reports, the factor was tagged as “unsure.” For example, to look up the factor “Hypocalcemia,” search terms included “Lung cancer risk factor Hypocalcemia” and other variations if necessary.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This retrospective study of EMR patient data has been approved by the institutional review board of Guilin Medical University Associated Hospital in China (QTLL202139).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Integrated Graph Model of the EMR Patient Graph and the UMLS KG</title>
        <p>To study the spectrum of health factors related to LC in the hospital EMR, we applied a new graph method that we recently developed using synthetic patient data. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the graph model integrating the EMR patient graph and UMLS knowledge subgraph for LC. The patient graph is patient-centered with patient nodes connecting to different categories of health factor nodes. <xref ref-type="table" rid="table2">Table 2</xref> lists the relationships between nodes, as generated in the graph database. The UMLS subgraph in this model is focused on the horizontal biomedical relationships between LC nodes and related concept nodes. Such an integration model enables the presentation of a patient’s actual health factors together with the UMLS KG’s related biomedical factors in the same graph.</p>
      </sec>
      <sec>
        <title>Patient Health Factor Graph Based on EMR Data</title>
        <p>From the hospital EMR, 1397 patients with LC were selected along with the same number of background patients without LC. After deidentified data of laboratory tests and procedures were integrated into the corresponding encounters, a total of 108,000 standard data for various categories of health factors were extracted from patient encounters. Although over 3000 different factors were collected, only approximately 550 factors shared by at least 10 patients with LC were used for building the patient health factor graph.</p>
        <p>The patient health factor graph was constructed by importing patient properties for the patient nodes and factor properties for the corresponding health factor nodes. The resulting patient graph had 93,000 relations between the 2794 patient nodes and 650 factor-value pair nodes. <xref ref-type="table" rid="table3">Table 3</xref> lists several examples of Cypher queries for searching patients with various factors. For example, clinicians can easily search for patients with LC with 1 or more co-occurring diseases (<xref rid="figure2" ref-type="fig">Figure 2</xref>), with 1 or more nonlaboratory factors (symptoms, medical histories, and observations; <xref rid="figure3" ref-type="fig">Figure 3</xref>), or laboratory tests (<xref rid="figure4" ref-type="fig">Figure 4</xref>). One can also easily search for any number of health factors shared by patients among patients with LC.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Examples of graph search tasks and queries using Cypher language.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="70"/>
            <col width="460"/>
            <col width="470"/>
            <thead>
              <tr valign="top">
                <td>Number</td>
                <td>Graph search task</td>
                <td>Cypher query<sup>a,b</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Search for patients with LC with 1-6 co-occurring diseases and present the topology.</p>
                    </list-item>
                    <list-item>
                      <p>C-389764: Hypocalcemia</p>
                    </list-item>
                    <list-item>
                      <p>C-172569: Bacterial Infection</p>
                    </list-item>
                    <list-item>
                      <p>C-765209: Obstructive pneumonia</p>
                    </list-item>
                    <list-item>
                      <p>C-305976: Pneumothorax</p>
                    </list-item>
                    <list-item>
                      <p>C-352894: Leukopenia</p>
                    </list-item>
                    <list-item>
                      <p>C-654730: Pneumonia</p>
                    </list-item>
                  </list>
                </td>
                <td>match (p:Patient {label:'1'})--&#62;(f {cat: 'dac'})<break/>where f.code = 'C-389764'  <break/>  
            or f.code = 'C-172569'  <break/>  
            or f.code = 'C-765209'  <break/>  
            or f.code = 'C-305976'  <break/>  
            or f.code = 'C-352894'  <break/>  
            or f.code = 'C-654730'<break/>return p, f;</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Search for patients with LC with 1-5 nonlaboratory factors and present the topology</p>
                    </list-item>
                    <list-item>
                      <p>C-549780: Pain</p>
                    </list-item>
                    <list-item>
                      <p>C-289547: Bloodstained sputum</p>
                    </list-item>
                    <list-item>
                      <p>C-127089: Hoarseness</p>
                    </list-item>
                    <list-item>
                      <p>C-029761: Productive Cough</p>
                    </list-item>
                    <list-item>
                      <p>C-294680: Swollen Lymph Node in head and neck</p>
                    </list-item>
                  </list>
                </td>
                <td>match (p:Patient {label:'1'})--&#62;(f) where  <break/>  
            (f.code = 'C-549780' and f.valcvt = 'true')<break/>or (f.code = 'C-289547' and f.valcvt='true')<break/>or (f.code = 'C-127089' and f.valcvt='true')<break/>or (f.code = 'C-029761' and f.valcvt='true')<break/>or (f.code = 'C-294680' and f.valcvt='true')<break/>return p, f;</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Search for patients with LC with 1-5 laboratory test values and present the topology.</p>
                    </list-item>
                    <list-item>
                      <p>C-659218: Hepatitis B virus</p>
                    </list-item>
                    <list-item>
                      <p>C-493765: Squamous cell carcinoma antigen</p>
                    </list-item>
                    <list-item>
                      <p>C-573086: Neuron-specific enolase measurement</p>
                    </list-item>
                    <list-item>
                      <p>C-120948: Gastrin-releasing peptide precursor increased</p>
                    </list-item>
                    <list-item>
                      <p>C-814793: Mycoplasma pneumoniae antibody</p>
                    </list-item>
                  </list>
                </td>
                <td>match (p:Patient {label:'1'})--&#62;(f {cat: 'lab'}) where<break/>(f.code = 'C-659218' and f.valcvt = 'true')<break/>or (f.code = 'C-493765' and f.valcvt = 'up')<break/>or (f.code = 'C-573086' and f.valcvt = 'up')<break/>or (f.code = 'C-120948' and f.valcvt = 'abnormal')<break/>or (f.code = 'C-814793' and f.valcvt = 'abnormal')<break/>return p, f;</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Search for 1 patient, show the electronic medical record health factor graph and the Unified Medical Language System knowledge graph together</p>
                    </list-item>
                  </list>
                </td>
                <td>match (p:Patient {label:'1', vpid:'_8908085766'})--&#62;(f)<break/>match (p)--&#62;(ap:AbstractPatient)--&#62;(tc:TargetConcept)--&#62;(cr:RelCat)--&#62;(c:Concept)<break/>return p, f, ap, tc, cr, c;</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Using Neo4j Cypher query language.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Patient with LC: label=1; background patient: label=0. Factor property f.code: unique local code. Factor property f.valcvt: converted value.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Topology of an example patient graph searched with 6 disease factors. Search query 1 in <xref ref-type="table" rid="table3">Table 3</xref> was used. Patient nodes are shown in blue and factor nodes are shown in red. Lines represent relationships between a patient and factors.</p>
          </caption>
          <graphic xlink:href="jmir_v24i11e40361_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Topology of an example patient graph searched with 5 nonlaboratory factors. Search query 2 in <xref ref-type="table" rid="table3">Table 3</xref> was used. Patient nodes are shown in blue and factor nodes are shown in pink. Lines represent relationships between a patient and factors.</p>
          </caption>
          <graphic xlink:href="jmir_v24i11e40361_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Topology of an example patient graph searched with 5 laboratory factors. Search query 3 in <xref ref-type="table" rid="table3">Table 3</xref> was used. Patient nodes are shown in blue and factor nodes are shown in orange. Lines represent relationships between a patient and factors.</p>
          </caption>
          <graphic xlink:href="jmir_v24i11e40361_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Integration of the EMR Patient Graph With the UMLS Subgraph</title>
        <p>As the largest integrated biomedical ontology, the UMLS graph contains hierarchies of diseases and horizontal relationships with other entities. Within a disease family such as LC, the various types of LCs are horizontally connected to a myriad of related biomedical concepts including genes, proteins, symptoms, observations, medication, and treatments. This study is focused on the UMLS knowledge subgraph containing horizontal relationships for LC. Using the UMLS LC hierarchy, the target LC codes found in EMRs were expanded to 8 main LC concepts (<xref ref-type="table" rid="table1">Table 1</xref>). From these concepts, approximately 1100 relations were identified in the UMLS ontology. Most of the relations were hierarchical—for example, a parent-child relationship—and thus the relations were further filtered by the biomedical relationships that we were interested in (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). The resulting UMLS LC biomedical subgraph had 8 LC concept nodes, 187 related biomedical concepts, and 188 horizontal biomedical relations (<xref rid="figure5" ref-type="fig">Figure 5</xref>).</p>
        <p>Through a single AbstractPatient node, the EMR patient graph was connected to the UMLS subgraph for LC. Search query 4 in <xref ref-type="table" rid="table3">Table 3</xref> and its search result in <xref rid="figure5" ref-type="fig">Figure 5</xref> show an example presentation of both actual patient’s health factors in the EMR and relevant biomedical knowledge in the UMLS in the same graph.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Example search result of the integrated biomedical graph. Search query 4 in Table 3 was used to search 1 specific ID of a patient with lung cancer. Left side: health factors from the electronic medical record of one patient with lung cancer. Right side: lung cancer biomedical knowledge from the Unified Medical Language System. Middle: single AbstractPatient as the connection. BioRel: biological concept relationship; MedRel: medical concept relationship.</p>
          </caption>
          <graphic xlink:href="jmir_v24i11e40361_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Generation of the Distribution of LC Health Factors From the EMR</title>
        <p>With the patient health factor graph, we searched for patients with LC and background patients with each of the health factors and its value. The connection delta ratios were calculated for each factor from the number of connections to patients with LC and the number of connections to background patients. Sorting factors by CDR in descending order generated a distribution of health factors for LC found in the EMR. The complete distribution of top-ranked factors over a CDR cutoff of 0.5 are shown in Table A1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and plotted in <xref rid="figure6" ref-type="fig">Figure 6</xref>. As examples, up to 5 top health factors in each category are shown in <xref ref-type="table" rid="table4">Table 4</xref>. For understanding LC risk factors, this distribution excluded the various cancers, all procedures and medications related to cancers, and treatments.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Distribution curve of lung cancer health factors sorted by the connection delta ratio (CDR; cutoff=0.5). Only partial codes are visible on the x-axis. The full spectrum of lung cancer health factors can be found in Table A1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </caption>
          <graphic xlink:href="jmir_v24i11e40361_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Partial distribution of lung cancer health factors sorted by category and connection delta ratio (cutoff=0.5) as examples. The full distribution of lung cancer health factors is provided in Table A1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="110"/>
            <col width="130"/>
            <col width="360"/>
            <col width="100"/>
            <col width="170"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Category<sup>a</sup></td>
                <td>Local code</td>
                <td>Term</td>
                <td>Value</td>
                <td>Connection delta ratio</td>
                <td>Tag</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>dac</td>
                <td>C-182460</td>
                <td>Left lung pulmonary obstructive pneumonia</td>
                <td>TRUE</td>
                <td>1.00</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>dac</td>
                <td>C-248056</td>
                <td>Right lung pulmonary obstructive pneumonia</td>
                <td>TRUE</td>
                <td>1.00</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>dac</td>
                <td>C-765209</td>
                <td>Obstructive pneumonia</td>
                <td>TRUE</td>
                <td>1.00</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>dac</td>
                <td>C-305976</td>
                <td>Pneumothorax</td>
                <td>TRUE</td>
                <td>0.93</td>
                <td>correlated</td>
              </tr>
              <tr valign="top">
                <td>dac</td>
                <td>C-172569</td>
                <td>Bacterial Infection</td>
                <td>TRUE</td>
                <td>0.88</td>
                <td>correlated</td>
              </tr>
              <tr valign="top">
                <td>lab</td>
                <td>C-659218</td>
                <td>Hepatitis B virus</td>
                <td>TRUE</td>
                <td>1.00</td>
                <td>correlated</td>
              </tr>
              <tr valign="top">
                <td>lab</td>
                <td>C-493765</td>
                <td>Squamous cell carcinoma antigen</td>
                <td>up</td>
                <td>0.90</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>lab</td>
                <td>C-573086</td>
                <td>Neuron-specific enolase measurement</td>
                <td>up</td>
                <td>0.82</td>
                <td>correlated</td>
              </tr>
              <tr valign="top">
                <td>lab</td>
                <td>C-952408</td>
                <td>Non-small cell lung cancer associated-antigen</td>
                <td>up</td>
                <td>0.82</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>lab</td>
                <td>C-103698</td>
                <td>Superoxide dismutase measurement</td>
                <td>down</td>
                <td>0.82</td>
                <td>correlated</td>
              </tr>
              <tr valign="top">
                <td>obs</td>
                <td>C-039824</td>
                <td>Mediastinal mass</td>
                <td>TRUE</td>
                <td>1.00</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>obs</td>
                <td>C-706432</td>
                <td>Lung mass</td>
                <td>TRUE</td>
                <td>1.00</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>obs</td>
                <td>C-748932</td>
                <td>Lung mass found in checkup</td>
                <td>TRUE</td>
                <td>1.00</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>obs</td>
                <td>C-134276</td>
                <td>Lung shadow</td>
                <td>TRUE</td>
                <td>0.91</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>obs</td>
                <td>C-706281</td>
                <td>Bronchial stenosis</td>
                <td>TRUE</td>
                <td>0.89</td>
                <td>correlated</td>
              </tr>
              <tr valign="top">
                <td>rf</td>
                <td>C-902187</td>
                <td>Smoking</td>
                <td>TRUE</td>
                <td>0.50</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>smp</td>
                <td>C-549780</td>
                <td>Pain</td>
                <td>TRUE</td>
                <td>1.00</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>smp</td>
                <td>C-289547</td>
                <td>Bloodstained sputum</td>
                <td>TRUE</td>
                <td>0.96</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>smp</td>
                <td>C-152064</td>
                <td>Hemoptysis (cough up blood)</td>
                <td>TRUE</td>
                <td>0.83</td>
                <td>correlated</td>
              </tr>
              <tr valign="top">
                <td>smp</td>
                <td>C-243071</td>
                <td>Shoulder Pain</td>
                <td>TRUE</td>
                <td>0.82</td>
                <td>confirmed</td>
              </tr>
              <tr valign="top">
                <td>smp</td>
                <td>C-127089</td>
                <td>Hoarseness</td>
                <td>TRUE</td>
                <td>0.80</td>
                <td>correlated</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Categories include condition (dac), laboratory test (lab), observation (obs), risk factor (rf), and symptom (smp).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We checked the medical literature for any associations between these top CDR-ranked health factors and LC [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. This literature review confirmed that 70 out of the 71 factors (Table A1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) were LC risk factors or were correlated with LC. The relationship between 1 factor, laboratory test for immunoglobulin E levels, and LC was unsure according to the literature [<xref ref-type="bibr" rid="ref27">27</xref>]. This high degree of concordance between the results of our CDR analysis and the literature suggests that the patient graph CDR method was effective in generating a reliable distribution of LC health factors from EMR patient data.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>Using hospital EMR patient data and applying the new patient graph CDR method recently developed from synthetic patient data, this study was able to construct an integrated biomedical graph for LC. From searching the graph, the study created a distribution of health factors for LC, which were verified through literature review. Our results show that the new strategy of first using synthetic patients for method development and then applying the methods with real patient data is valid and effective.</p>
      <p>This study has implications for hospitals with regard to harnessing KG databases and technologies. First, generating an integrated biomedical graph with hospital EMR data may enable medical professionals to view individual patient’s health factor graphs along with the related UMLS KGs for comprehensive comparisons. Current medical concept nodes horizontally related to the LC nodes are mostly genes and gene-related biological information, as well as drugs and treatment-related information from the UMLS ontology (see <xref rid="figure5" ref-type="fig">Figure 5</xref>). Since the UMLS is updated quarterly, the LC integrated biomedical graph will grow as the UMLS grows. Thus, this KG integration offers a new way for hospitals to bring continuously updated international standard biomedical knowledge to patient care. The current graph model is designed specifically for searching risk factors; however, it can be modified for other clinical information tasks. It may also be integrated with cancer-associated lifestyle KGs for disease management information [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      <p>The second implication of this study may be applying the CDR-ranked distribution of health factors to build more effective or practical machine learning models for LC risk prediction. Because the distribution ranks factors from higher to lower relative strength, they may be used to help select more health factors to build prediction models; that is, feature engineering. For example, we have an ongoing project experimenting with the factor distribution in building LC risk prediction machine learning models. Knowing the risk factors actually found in the EMR data, we could focus on these risk factors and reduce the variables from over 100 to less than 30 in the machine learning models that were generated from EMR-wide data. To increase the LC screening rate in larger populations, machine learning models with a small number of variables for which data can be readily available in community and rural clinics are necessary.</p>
      <p>In addition, the patient health factor graphs generated from EMR data may enable hospitals to study the effect of various types of factors in diagnosis, medication, treatment, and disease management. Such graph analysis complements existing statistical analysis. Traditionally, studies on individual risk factors are hypothesis driven and use a clinical trial or case-control study design [<xref ref-type="bibr" rid="ref29">29</xref>]. The literature found in this study for verification of the health factor distribution collectively indicate the use of this approach [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. Because this study’s patient graph method is EMR data driven, it can reveal potential new risk factors or inconclusive risk factors that deserve additional research. For example, the factor “laboratory test for immunoglobulin E levels” was tagged as “unsure” in the distribution because prior studies were inclusive. Our CDR analysis suggests that this immunoglobulin E factor requires further clinical validation [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      <p>Because EMR data sometimes have biases and missing data, the EMR data–driven patient graph CDR method has limitations. CDR is a simple measurement of a factor’s relative strength, but caution should be taken when considering factors with a high CDR but a small number of connections. The higher the number of connections, the more reliable the CDR. Hence, studies should set a cutoff for the CDR as well as the minimal number of connections to ensure that the study uses enough data. It is also important to recognize factors that might be affected by data biases and to exclude them from CDR analysis [<xref ref-type="bibr" rid="ref31">31</xref>]. For EMRs lacking standardized and structured data, collecting standardized data is crucial but challenging. If a data collection pipeline is not fully automated, collecting enough unbiased standardized patient profile data will be a very time-consuming process.</p>
      <p>In conclusion, by collecting standardized data of thousands of patients with and those without LC from EMRs, it was possible to generate a hospital-wide patient-centered health factor graph for graph search and presentation. It was also practical to integrate the patient graph with the UMLS KG for LC, enabling hospitals to bring continuously updated international standard biomedical KGs from the UMLS to clinical care. Applying CDR analysis to the graph of patients with LC yielded a CDR-sorted distribution of health factors, where top CDR-ranked health factors showed a high degree of concordance with the literature. The resulting distribution of LC health factors can be used to help personalize risk evaluation and preventive screening recommendations.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Complete lung cancer health factor distribution sorted by category (Cat) and connection delta ratio (CDR).</p>
        <media xlink:href="jmir_v24i11e40361_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 159 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CDR</term>
          <def>
            <p>connection delta ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">KG</term>
          <def>
            <p>knowledge graph</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LC</term>
          <def>
            <p>lung cancer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PDJ</term>
          <def>
            <p>patient diagnosis journey</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the Sichuan Science and Technology Program (grant2020YFQ0019) and the Guilin Municipal Science and Technology Bureau, China (grant 20190219-2).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>AC designed the study and drafted the manuscript. R Huang wrote programs and analyzed data. EW and R Han collected the data. JW supervised the study. ZZ, QL, and BS proposed the study, obtained funding, and directed the study.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pinsky</surname>
              <given-names>PF</given-names>
            </name>
          </person-group>
          <article-title>Lung cancer screening with low-dose CT: a world-wide view</article-title>
          <source>Transl Lung Cancer Res</source>
          <year>2018</year>
          <month>06</month>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>234</fpage>
          <lpage>242</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.21037/tlcr.2018.05.12"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/tlcr.2018.05.12</pub-id>
          <pub-id pub-id-type="medline">30050762</pub-id>
          <pub-id pub-id-type="pii">tlcr-07-03-234</pub-id>
          <pub-id pub-id-type="pmcid">PMC6037972</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>National Lung Screening Trial Research Team</collab>
            <name name-style="western">
              <surname>Aberle</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Clapp</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Fagerstrom</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Gareen</surname>
              <given-names>IF</given-names>
            </name>
            <name name-style="western">
              <surname>Gatsonis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Sicks</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Reduced lung-cancer mortality with low-dose computed tomographic screening</article-title>
          <source>N Engl J Med</source>
          <year>2011</year>
          <month>08</month>
          <day>04</day>
          <volume>365</volume>
          <issue>5</issue>
          <fpage>395</fpage>
          <lpage>409</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21714641"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1102873</pub-id>
          <pub-id pub-id-type="medline">21714641</pub-id>
          <pub-id pub-id-type="pmcid">PMC4356534</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tammemägi</surname>
              <given-names>Martin C</given-names>
            </name>
            <name name-style="western">
              <surname>Katki</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Hocking</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Church</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Caporaso</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kvale</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Chaturvedi</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Silvestri</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Commins</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Selection criteria for lung-cancer screening</article-title>
          <source>N Engl J Med</source>
          <year>2013</year>
          <month>02</month>
          <day>21</day>
          <volume>368</volume>
          <issue>8</issue>
          <fpage>728</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23425165"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1211776</pub-id>
          <pub-id pub-id-type="medline">23425165</pub-id>
          <pub-id pub-id-type="pmcid">PMC3929969</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dubin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Lung Cancer in Non-Smokers</article-title>
          <source>Mo Med</source>
          <year>2020</year>
          <volume>117</volume>
          <issue>4</issue>
          <fpage>375</fpage>
          <lpage>379</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32848276"/>
          </comment>
          <pub-id pub-id-type="medline">32848276</pub-id>
          <pub-id pub-id-type="pii">ms117_p0375</pub-id>
          <pub-id pub-id-type="pmcid">PMC7431055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <article-title>Lung Cancer Screening</article-title>
          <source>National Cancer Institute. Cancer Trends Progress Report</source>
          <access-date>2022-11-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://progressreport.cancer.gov/detection/lung_cancer">https://progressreport.cancer.gov/detection/lung_cancer</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Challenges and research opportunities for lung cancer screening in China</article-title>
          <source>Cancer Commun (Lond)</source>
          <year>2018</year>
          <month>06</month>
          <day>07</day>
          <volume>38</volume>
          <issue>1</issue>
          <fpage>34</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancercommun.biomedcentral.com/articles/10.1186/s40880-018-0305-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s40880-018-0305-0</pub-id>
          <pub-id pub-id-type="medline">29880036</pub-id>
          <pub-id pub-id-type="pii">10.1186/s40880-018-0305-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC5992836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Santos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Colaço</surname>
              <given-names>Ana R</given-names>
            </name>
            <name name-style="western">
              <surname>Nielsen</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Strauss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Geyer</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Coscia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Albrechtsen</surname>
              <given-names>NJW</given-names>
            </name>
            <name name-style="western">
              <surname>Mundt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A knowledge graph to interpret clinical proteomics data</article-title>
          <source>Nat Biotechnol</source>
          <year>2022</year>
          <month>05</month>
          <volume>40</volume>
          <issue>5</issue>
          <fpage>692</fpage>
          <lpage>702</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35102292"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41587-021-01145-6</pub-id>
          <pub-id pub-id-type="medline">35102292</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41587-021-01145-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC9110295</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <article-title>Neo4j Desktop User Interface Guide</article-title>
          <source>Neo4j, Inc</source>
          <access-date>2022-11-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://neo4j.com/developer/neo4j-desktop/">https://neo4j.com/developer/neo4j-desktop/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rotmensch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Halpern</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tlimat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Horng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Learning a Health Knowledge Graph from Electronic Medical Records</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <month>07</month>
          <day>20</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>5994</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-017-05778-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-017-05778-z</pub-id>
          <pub-id pub-id-type="medline">28729710</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-017-05778-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC5519723</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schrodt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dudchenko</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Knaup-Gregori</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ganzinger</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Graph-Representation of Patient Data: a Systematic Literature Review</article-title>
          <source>J Med Syst</source>
          <year>2020</year>
          <month>03</month>
          <day>12</day>
          <volume>44</volume>
          <issue>4</issue>
          <fpage>86</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32166501"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10916-020-1538-4</pub-id>
          <pub-id pub-id-type="medline">32166501</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10916-020-1538-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC7067737</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tuck</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A cancer graph: a lung cancer property graph database in Neo4j</article-title>
          <source>BMC Res Notes</source>
          <year>2022</year>
          <month>02</month>
          <day>14</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>45</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcresnotes.biomedcentral.com/articles/10.1186/s13104-022-05912-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13104-022-05912-9</pub-id>
          <pub-id pub-id-type="medline">35164854</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13104-022-05912-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC8842806</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <month>01</month>
          <day>01</day>
          <volume>32</volume>
          <issue>Database issue</issue>
          <fpage>D267</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/14681409"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
          <pub-id pub-id-type="medline">14681409</pub-id>
          <pub-id pub-id-type="pii">32/suppl_1/D267</pub-id>
          <pub-id pub-id-type="pmcid">PMC308795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Otegi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Soroa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agirre</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Improving search over Electronic Health Records using UMLS-based query expansion through random walks</article-title>
          <source>J Biomed Inform</source>
          <year>2014</year>
          <month>10</month>
          <volume>51</volume>
          <fpage>100</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00098-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.04.013</pub-id>
          <pub-id pub-id-type="medline">24768598</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00098-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <article-title>Signs and symptoms of lung cancer</article-title>
          <source>American Cancer Society</source>
          <access-date>2022-11-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cancer.org/cancer/lung-cancer/detection-diagnosis-staging/signs-symptoms.html">https://www.cancer.org/cancer/lung-cancer/detection-diagnosis-staging/signs-symptoms.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>What are the risk factors for lung cancer?</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2022-11-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/cancer/lung/basic_info/risk_factors.htm">https://www.cdc.gov/cancer/lung/basic_info/risk_factors.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lv</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
            <collab>China Kadoorie Biobank Collaborative Group</collab>
          </person-group>
          <article-title>Associations Between Hepatitis B Virus Infection and Risk of All Cancer Types</article-title>
          <source>JAMA Netw Open</source>
          <year>2019</year>
          <month>06</month>
          <day>05</day>
          <volume>2</volume>
          <issue>6</issue>
          <fpage>e195718</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2019.5718"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.5718</pub-id>
          <pub-id pub-id-type="medline">31199446</pub-id>
          <pub-id pub-id-type="pii">2735760</pub-id>
          <pub-id pub-id-type="pmcid">PMC6575146</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gridelli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Carbone</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Guarize</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Karachaliou</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mok</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Petrella</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Spaggiari</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rosell</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Non-small-cell lung cancer</article-title>
          <source>Nat Rev Dis Primers</source>
          <year>2015</year>
          <month>05</month>
          <day>21</day>
          <volume>1</volume>
          <fpage>15009</fpage>
          <pub-id pub-id-type="doi">10.1038/nrdp.2015.9</pub-id>
          <pub-id pub-id-type="medline">27188576</pub-id>
          <pub-id pub-id-type="pii">nrdp20159</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferraz Gonçalves</surname>
              <given-names>José António</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rema</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Magalhães</surname>
              <given-names>Miguel</given-names>
            </name>
            <name name-style="western">
              <surname>Esperança</surname>
              <given-names>Ana</given-names>
            </name>
            <name name-style="western">
              <surname>Sousa</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Hypocalcemia in cancer patients: An exploratory study</article-title>
          <source>Porto Biomed J</source>
          <year>2019</year>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e45</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31930179"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/j.pbj.0000000000000045</pub-id>
          <pub-id pub-id-type="medline">31930179</pub-id>
          <pub-id pub-id-type="pii">PBJ-D-18-00028</pub-id>
          <pub-id pub-id-type="pmcid">PMC6924975</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>You</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lange</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Association between dietary sodium, potassium intake and lung cancer risk: evidence from the prostate, lung, colorectal and ovarian cancer screening trial and the Women's Health Initiative</article-title>
          <source>Transl Lung Cancer Res</source>
          <year>2021</year>
          <month>01</month>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>45</fpage>
          <lpage>56</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.21037/tlcr-20-870"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/tlcr-20-870</pub-id>
          <pub-id pub-id-type="medline">33569292</pub-id>
          <pub-id pub-id-type="pii">tlcr-10-01-45</pub-id>
          <pub-id pub-id-type="pmcid">PMC7867772</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dreyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Prescott</surname>
              <given-names>Eva</given-names>
            </name>
            <name name-style="western">
              <surname>Gyntelberg</surname>
              <given-names>Finn</given-names>
            </name>
          </person-group>
          <article-title>Association between atherosclerosis and female lung cancer--a Danish cohort study</article-title>
          <source>Lung Cancer</source>
          <year>2003</year>
          <month>12</month>
          <volume>42</volume>
          <issue>3</issue>
          <fpage>247</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.1016/s0169-5002(03)00295-2</pub-id>
          <pub-id pub-id-type="medline">14644511</pub-id>
          <pub-id pub-id-type="pii">S0169500203002952</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>SW</given-names>
            </name>
          </person-group>
          <article-title>Superoxide dismutase 2 gene and cancer risk: evidence from an updated meta-analysis</article-title>
          <source>Int J Clin Exp Med</source>
          <year>2015</year>
          <volume>8</volume>
          <issue>9</issue>
          <fpage>14647</fpage>
          <lpage>55</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26628947"/>
          </comment>
          <pub-id pub-id-type="medline">26628947</pub-id>
          <pub-id pub-id-type="pmcid">PMC4658836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Unsal</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Atalay</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Atikcan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yilmaz</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Prognostic significance of hemostatic parameters in patients with lung cancer</article-title>
          <source>Respir Med</source>
          <year>2004</year>
          <month>02</month>
          <volume>98</volume>
          <issue>2</issue>
          <fpage>93</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0954-6111(03)00261-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.rmed.2003.07.001</pub-id>
          <pub-id pub-id-type="medline">14971870</pub-id>
          <pub-id pub-id-type="pii">S0954-6111(03)00261-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ercan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Mauracher</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Grilz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hell</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hellinger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schmid</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Moik</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ay</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pabinger</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zellner</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Alterations of the Platelet Proteome in Lung Cancer: Accelerated F13A1 and ER Processing as New Actors in Hypercoagulability</article-title>
          <source>Cancers (Basel)</source>
          <year>2021</year>
          <month>05</month>
          <day>08</day>
          <volume>13</volume>
          <issue>9</issue>
          <fpage>2260</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=cancers13092260"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/cancers13092260</pub-id>
          <pub-id pub-id-type="medline">34066760</pub-id>
          <pub-id pub-id-type="pii">cancers13092260</pub-id>
          <pub-id pub-id-type="pmcid">PMC8125802</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yun</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Platelet distribution width correlates with prognosis of non-small cell lung cancer</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <month>06</month>
          <day>14</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>3456</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-017-03772-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-017-03772-z</pub-id>
          <pub-id pub-id-type="medline">28615714</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-017-03772-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC5471191</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Diagnostic accuracy of human epididymis secretory protein 4 for lung cancer: a systematic review and meta-analysis</article-title>
          <source>J Thorac Dis</source>
          <year>2019</year>
          <month>07</month>
          <volume>11</volume>
          <issue>7</issue>
          <fpage>2737</fpage>
          <lpage>2744</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.21037/jtd.2019.06.72"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/jtd.2019.06.72</pub-id>
          <pub-id pub-id-type="medline">31463101</pub-id>
          <pub-id pub-id-type="pii">jtd-11-07-2737</pub-id>
          <pub-id pub-id-type="pmcid">PMC6687986</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmidt-Hansen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Berendse</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Baldwin</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>Lung cancer in symptomatic patients presenting in primary care: a systematic review of risk prediction tools</article-title>
          <source>Br J Gen Pract</source>
          <year>2017</year>
          <month>06</month>
          <volume>67</volume>
          <issue>659</issue>
          <fpage>e396</fpage>
          <lpage>e404</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bjgp.org/cgi/pmidlookup?view=long&#38;pmid=28483820"/>
          </comment>
          <pub-id pub-id-type="doi">10.3399/bjgp17X690917</pub-id>
          <pub-id pub-id-type="medline">28483820</pub-id>
          <pub-id pub-id-type="pii">bjgp17X690917</pub-id>
          <pub-id pub-id-type="pmcid">PMC5442955</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Helby</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bojesen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nielsen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nordestgaard</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>IgE and risk of cancer in 37 747 individuals from the general population</article-title>
          <source>Ann Oncol</source>
          <year>2015</year>
          <month>08</month>
          <volume>26</volume>
          <issue>8</issue>
          <fpage>1784</fpage>
          <lpage>90</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0923-7534(19)31858-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/annonc/mdv231</pub-id>
          <pub-id pub-id-type="medline">25969367</pub-id>
          <pub-id pub-id-type="pii">S0923-7534(19)31858-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sinha</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Prostate cancer management with lifestyle intervention: From knowledge graph to Chatbot</article-title>
          <source>Clinical and Translational Dis</source>
          <year>2022</year>
          <month>02</month>
          <day>20</day>
          <volume>2</volume>
          <issue>1</issue>
          <pub-id pub-id-type="doi">10.1002/ctd2.29</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stampfer</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ridker</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Dzau</surname>
              <given-names>VJ</given-names>
            </name>
          </person-group>
          <article-title>Risk factor criteria</article-title>
          <source>Circulation</source>
          <year>2004</year>
          <month>06</month>
          <day>29</day>
          <volume>109</volume>
          <issue>25 Suppl 1</issue>
          <fpage>IV3</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1161/01.CIR.0000133446.69171.7d</pub-id>
          <pub-id pub-id-type="medline">15226245</pub-id>
          <pub-id pub-id-type="pii">109/25_suppl_1/IV-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kantor</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Signorello</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Allergies and Asthma in Relation to Cancer Risk</article-title>
          <source>Cancer Epidemiol Biomarkers Prev</source>
          <year>2019</year>
          <month>08</month>
          <volume>28</volume>
          <issue>8</issue>
          <fpage>1395</fpage>
          <lpage>1403</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31167755"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/1055-9965.EPI-18-1330</pub-id>
          <pub-id pub-id-type="medline">31167755</pub-id>
          <pub-id pub-id-type="pii">1055-9965.EPI-18-1330</pub-id>
          <pub-id pub-id-type="pmcid">PMC6677632</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verheij</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Curcin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Delaney</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>McGilchrist</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Possible Sources of Bias in Primary Care Electronic Health Record Data Use and Reuse</article-title>
          <source>J Med Internet Res</source>
          <year>2018</year>
          <month>05</month>
          <day>29</day>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>e185</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2018/5/e185/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.9134</pub-id>
          <pub-id pub-id-type="medline">29844010</pub-id>
          <pub-id pub-id-type="pii">v20i5e185</pub-id>
          <pub-id pub-id-type="pmcid">PMC5997930</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
