<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="letter" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i9e14944</article-id>
      <article-id pub-id-type="pmid">32930665</article-id>
      <article-id pub-id-type="doi">10.2196/14944</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Letter to the Editor</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Letter to the Editor</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Comment on “Prediction of the 1-Year Risk of Incident Lung Cancer: Prospective Study Using Electronic Health Records from the State of Maine”</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Moorhead</surname>
            <given-names>Anne</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ling</surname>
            <given-names>Xuefeng</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Liao</surname>
            <given-names>Jiayu</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Rahmani</surname>
            <given-names>Jamal</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8002-7021</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Karimi</surname>
            <given-names>Roya</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4746-7575</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Khani</surname>
            <given-names>Yousef</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9080-5862</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Sabour</surname>
            <given-names>Siamak</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Clinical Epidemiology</institution>
            <institution>School of Health and Safety</institution>
            <institution>Shahid Beheshti University of Medical Sciences</institution>
            <addr-line>Chamran Highway, Velenjak, Daneshjoo Blvd</addr-line>
            <addr-line>Tehran, 198353-5511</addr-line>
            <country>Iran</country>
            <phone>98 2122421814</phone>
            <email>s.sabour@sbmu.ac.ir</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1928-992X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Clinical Epidemiology</institution>
        <institution>School of Health and Safety</institution>
        <institution>Shahid Beheshti University of Medical Sciences</institution>
        <addr-line>Tehran</addr-line>
        <country>Iran</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Safety Promotions and Injury Prevention Research Centre</institution>
        <institution>Shahid Beheshti University of Medical Sciences</institution>
        <addr-line>Tehran</addr-line>
        <country>Iran</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Siamak Sabour <email>s.sabour@sbmu.ac.ir</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>9</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>9</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>9</issue>
      <elocation-id>e14944</elocation-id>
      <history>
        <date date-type="received">
          <day>5</day>
          <month>6</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>6</day>
          <month>8</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>8</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>17</day>
          <month>9</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Jamal Rahmani, Roya Karimi, Yousef Khani, Siamak Sabour. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 15.09.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2020/9/e14944" xlink:type="simple"/>
      <related-article related-article-type="commentary-article" id="v21i5e13260" ext-link-type="doi" xlink:href="10.2196/13260" vol="21" page="e13260" xlink:type="simple">https://www.jmir.org/2019/5/e13260/</related-article>
      <kwd-group>
        <kwd>prediction</kwd>
        <kwd>area under the curve</kwd>
        <kwd>AUC</kwd>
        <kwd>lung cancer</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <p>We read the recent article by Wang et al [<xref ref-type="bibr" rid="ref1">1</xref>] with great interest. This paper was published in 2019 in the <italic>Journal of Medical Internet Research</italic>. The authors aimed to develop and validate a prospective risk prediction model to identify patients at risk of new incident lung cancer within the next 1 year in the general population. They used data from individual patient electronic health records (EHRs), which was extracted from the Maine Health Information Exchange network. The Extreme Gradient Boosting (XGBoost) algorithm was adopted to build the model, and the authors reported an area under the curve (AUC) of 0.88 (95% CI 0.87-0.88) for their model validation, according to a prospective cohort data. Finally, the authors concluded that their model was able to identify statewide, high-risk patients.</p>
    <p>Risk prediction models are effectively useful due to their role in decision making. However, there are some methodological commentaries that we would like to mention. First, AUC is an appropriate measure for assessing discrimination. Discrimination is defined as the ability to distinguish events versus nonevents. However, it assumes that two persons are randomly selected—one who will develop the disease and one who will not. AUC assigns a higher probability of an outcome to the one who will develop the disease. A c-index value of 0.5 expresses a random chance; however, the usual c-index for a prediction model is 0.60 to 0.85. This range can be changeable under different conditions. What we should always consider about the AUC measure is that a high value of AUC discerns excellent discrimination, but it can also reflect a situation with limited relevance. This situation might arise because the variable is related to the diagnostic or early onset of the disease instead of prediction [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Furthermore, the receiver operating characteristic (ROC) would be a good tool for binary classification, but it is not instrumental for risk stratification. For risk stratification (low- and high-risk bins), the sensitivity in low and high specificity, and positive predictive value (PPV) in high-risk bins, are more discriminating parameters for the ability of the algorithm.</p>
    <p>Second, there are several types of external validation such as validation in more recent patients (temporal validation), in other places (geographic validation), or by other investigators at other sites (fully independent validation). Having two exemplary data sets with huge sample sizes, it would be suggestible to test the above-mentioned external validity. Moreover, internal validation is a necessary part of model development. It determines the reproducibility of a developed prediction model for the derivative sample and prevents the over-interpretation of the data. Resampling techniques, such as cross-validation and bootstrapping, can be performed; bootstrap validation, in particular, appears to be the most attractive option for obtaining stable optimism-corrected estimates [<xref ref-type="bibr" rid="ref2">2</xref>]. Furthermore, it is of importance that the authors add the validation of data production in the real world after deployment, since it would be more revealing due to the unexpected data challenges encountered during real-time usage by clinical providers.</p>
    <p>Third, a mistake that is very common occurs when referring to statistically significant <italic>P</italic> values. A <italic>P</italic> value depends on statistical, instead of clinical, logic; thus, researchers should consider judging outputs based on effect size, rather than <italic>P</italic> value.</p>
    <p>A further common issue is missing data that can influence the model development. Missing data often follow a nonrandom pattern, where there is an explanation and cause behind it. If all missing values are removed, the cause and explanation will be lost, which may affect the conclusion and the model development. To generate the model, multivariable regression techniques usually use as a stepwise model (backward is more preferable), and concomitantly checking the Akaike information criterion can help us to decide if the model fits well enough.</p>
    <p>Finally, it is important to investigate the interactions between variables in prediction studies. Developing a model, score, or index without considering interactions among variables may elicit changes to the prediction in the real world and lead to misleading messages [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>].</p>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="other">
        <p><bold>Editorial notice</bold>: The corresponding author of “Prediction of the 1-Year Risk of Incident Lung Cancer: Prospective Study Using Electronic Health Records from the State of Maine” did not respond to our invitation to reply to this commentary.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Xiaofang</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Yan</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>Shiying</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Le</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>Jiayu</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Chengyin</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Minjie</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Oliver</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Modi</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>Ching Ho</given-names>
            </name>
            <name name-style="western">
              <surname>Duong</surname>
              <given-names>Son Q</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Bo</given-names>
            </name>
            <name name-style="western">
              <surname>Alfreds</surname>
              <given-names>Shaun T</given-names>
            </name>
            <name name-style="western">
              <surname>Stearns</surname>
              <given-names>Frank</given-names>
            </name>
            <name name-style="western">
              <surname>Kanov</surname>
              <given-names>Laura</given-names>
            </name>
            <name name-style="western">
              <surname>Sylvester</surname>
              <given-names>Karl G</given-names>
            </name>
            <name name-style="western">
              <surname>Widen</surname>
              <given-names>Eric</given-names>
            </name>
            <name name-style="western">
              <surname>McElhinney</surname>
              <given-names>Doff B</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>Xuefeng B</given-names>
            </name>
          </person-group>
          <article-title>Prediction of the 1-Year Risk of Incident Lung Cancer: Prospective Study Using Electronic Health Records from the State of Maine</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>05</month>
          <day>16</day>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>e13260</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/5/e13260/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13260</pub-id>
          <pub-id pub-id-type="medline">31099339</pub-id>
          <pub-id pub-id-type="pii">v21i5e13260</pub-id>
          <pub-id pub-id-type="pmcid">PMC6542253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Kyunghwa</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Kijun</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Byoung Wook</given-names>
            </name>
          </person-group>
          <article-title>How to Develop, Validate, and Compare Clinical Prediction Models Involving Radiological Parameters: Study Design and Statistical Methods</article-title>
          <source>Korean J Radiol</source>
          <year>2016</year>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>339</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.kjronline.org/DOIx.php?id=10.3348/kjr.2016.17.3.339"/>
          </comment>
          <pub-id pub-id-type="doi">10.3348/kjr.2016.17.3.339</pub-id>
          <pub-id pub-id-type="medline">27134523</pub-id>
          <pub-id pub-id-type="pmcid">PMC4842854</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>Karel G M</given-names>
            </name>
            <name name-style="western">
              <surname>Royston</surname>
              <given-names>Patrick</given-names>
            </name>
            <name name-style="western">
              <surname>Vergouwe</surname>
              <given-names>Yvonne</given-names>
            </name>
            <name name-style="western">
              <surname>Grobbee</surname>
              <given-names>Diederick E</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>Douglas G</given-names>
            </name>
          </person-group>
          <article-title>Prognosis and prognostic research: what, why, and how?</article-title>
          <source>BMJ</source>
          <year>2009</year>
          <month>02</month>
          <day>23</day>
          <volume>338</volume>
          <fpage>b375</fpage>
          <pub-id pub-id-type="doi">10.1136/bmj.b375</pub-id>
          <pub-id pub-id-type="medline">19237405</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sabour</surname>
              <given-names>Siamak</given-names>
            </name>
          </person-group>
          <article-title>Prediction of preterm delivery using levels of VEGF and leptin in amniotic fluid from the second trimester: prediction rules</article-title>
          <source>Arch Gynecol Obstet</source>
          <year>2015</year>
          <month>04</month>
          <volume>291</volume>
          <issue>4</issue>
          <fpage>719</fpage>
          <pub-id pub-id-type="doi">10.1007/s00404-014-3568-y</pub-id>
          <pub-id pub-id-type="medline">25490880</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sabour</surname>
              <given-names>Siamak</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>Fariba</given-names>
            </name>
          </person-group>
          <article-title>Predictive value of confocal scanning laser for the onset of visual field loss</article-title>
          <source>Ophthalmology</source>
          <year>2013</year>
          <month>06</month>
          <volume>120</volume>
          <issue>6</issue>
          <fpage>e31</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ophtha.2013.01.055</pub-id>
          <pub-id pub-id-type="medline">23732064</pub-id>
          <pub-id pub-id-type="pii">S0161-6420(13)00101-2</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
