<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i11e26777</article-id>
      <article-id pub-id-type="pmid">34730546</article-id>
      <article-id pub-id-type="doi">10.2196/26777</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Natural Language Processing and Machine Learning Methods to Characterize Unstructured Patient-Reported Outcomes: Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Reuter</surname>
            <given-names>Katja</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Basu</surname>
            <given-names>Tanmay</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>Zhaohua</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3245-2004</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Sim</surname>
            <given-names>Jin-ah</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3494-3002</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Jade X</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5377-0509</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Forrest</surname>
            <given-names>Christopher B</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1252-068X</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Krull</surname>
            <given-names>Kevin R</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0476-7001</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Srivastava</surname>
            <given-names>Deokumar</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6693-8120</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Hudson</surname>
            <given-names>Melissa M</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6984-2407</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Robison</surname>
            <given-names>Leslie L</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7460-8578</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Baker</surname>
            <given-names>Justin N</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6584-6483</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>I-Chan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Epidemiology and Cancer Control</institution>
            <institution>St. Jude Children's Research Hospital</institution>
            <addr-line>MS 735, 262 Danny Thomas Pl</addr-line>
            <addr-line>Memphis, TN, 38105</addr-line>
            <country>United States</country>
            <phone>1 9015958369</phone>
            <email>I-Chan.Huang@STJUDE.ORG</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1194-3923</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biostatistics</institution>
        <institution>St. Jude Children's Research Hospital</institution>
        <addr-line>Memphis, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Epidemiology and Cancer Control</institution>
        <institution>St. Jude Children's Research Hospital</institution>
        <addr-line>Memphis, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of AI Convergence</institution>
        <institution>Hallym University</institution>
        <addr-line>Chuncheon</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Roberts Center for Pediatric Research</institution>
        <institution>Children's Hospital of Philadelphia</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Oncology</institution>
        <institution>St. Jude Children's Research Hospital</institution>
        <addr-line>Memphis, TN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: I-Chan Huang <email>I-Chan.Huang@STJUDE.ORG</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>3</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>11</issue>
      <elocation-id>e26777</elocation-id>
      <history>
        <date date-type="received">
          <day>25</day>
          <month>12</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>3</day>
          <month>2</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>20</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>12</day>
          <month>8</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Zhaohua Lu, Jin-ah Sim, Jade X Wang, Christopher B Forrest, Kevin R Krull, Deokumar Srivastava, Melissa M Hudson, Leslie L Robison, Justin N Baker, I-Chan Huang. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 03.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/11/e26777" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Assessing patient-reported outcomes (PROs) through interviews or conversations during clinical encounters provides insightful information about survivorship.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to test the validity of natural language processing (NLP) and machine learning (ML) algorithms in identifying different attributes of pain interference and fatigue symptoms experienced by child and adolescent survivors of cancer versus the judgment by PRO content experts as the gold standard to validate NLP/ML algorithms.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This cross-sectional study focused on child and adolescent survivors of cancer, aged 8 to 17 years, and caregivers, from whom 391 meaning units in the pain interference domain and 423 in the fatigue domain were generated for analyses. Data were collected from the After Completion of Therapy Clinic at St. Jude Children’s Research Hospital. Experienced pain interference and fatigue symptoms were reported through in-depth interviews. After verbatim transcription, analyzable sentences (ie, meaning units) were semantically labeled by 2 content experts for each attribute (physical, cognitive, social, or unclassified). Two NLP/ML methods were used to extract and validate the semantic features: bidirectional encoder representations from transformers (BERT) and Word2vec plus one of the ML methods, the support vector machine or extreme gradient boosting. Receiver operating characteristic and precision-recall curves were used to evaluate the accuracy and validity of the NLP/ML methods.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Compared with Word2vec/support vector machine and Word2vec/extreme gradient boosting, BERT demonstrated higher accuracy in both symptom domains, with 0.931 (95% CI 0.905-0.957) and 0.916 (95% CI 0.887-0.941) for problems with cognitive and social attributes on pain interference, respectively, and 0.929 (95% CI 0.903-0.953) and 0.917 (95% CI 0.891-0.943) for problems with cognitive and social attributes on fatigue, respectively. In addition, BERT yielded superior areas under the receiver operating characteristic curve for cognitive attributes on pain interference and fatigue domains (0.923, 95% CI 0.879-0.997; 0.948, 95% CI 0.922-0.979) and superior areas under the precision-recall curve for cognitive attributes on pain interference and fatigue domains (0.818, 95% CI 0.735-0.917; 0.855, 95% CI 0.791-0.930).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The BERT method performed better than the other methods. As an alternative to using standard PRO surveys, collecting unstructured PROs via interviews or conversations during clinical encounters and applying NLP/ML methods can facilitate PRO assessment in child and adolescent cancer survivors.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>machine learning</kwd>
        <kwd>PROs</kwd>
        <kwd>pediatric oncology</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Pediatric Cancer and Patient-Reported Outcomes</title>
        <p>Innovative anticancer therapies have significantly improved the 5-year survival rates of pediatric and adolescent patients with cancer in the United States [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. However, toxic treatment often causes long-term sequelae (eg, physical and psychological morbidities and premature mortality [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]), which contribute to poor patient-reported outcomes (PROs) and impaired quality of life [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Poor PROs, such as fatigue, pain, psychological distress, and neurocognitive problems, are prevalent in survivors of cancer aged &#60;18 years [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. Approximately 50% of young survivors of childhood cancer experience severe fatigue [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>] or pain [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], and both can worsen as survivors become older [<xref ref-type="bibr" rid="ref15">15</xref>]. Assessing PROs from survivors and caregivers can complement clinical assessments, suggest potential adverse medical events, and facilitate the provision of appropriate interventions [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      </sec>
      <sec>
        <title>Unstructured PROs</title>
        <p>Conventionally, PROs are collected from childhood survivors of cancer during follow-up care using standard surveys with prespecified content of PROs. Given busy clinic schedules, survivors may be unable or unwilling to complete surveys. Performing interviews or initiating conversations by clinicians are alternative methods of collecting PROs. However, PROs collected by this method are qualitative or unstructured in nature, which requires specific techniques for data processing and analysis. Natural language processing (NLP), a discipline of linguistics, information engineering, and artificial intelligence, initially designed for processing a large amount of natural language data, provides an innovative avenue for PRO research with potential clinical applications [<xref ref-type="bibr" rid="ref18">18</xref>]. However, the validity of applying this method to evaluate PROs in oncology is understudied.</p>
      </sec>
      <sec>
        <title>Application of NLP for PRO Analysis</title>
        <p>NLP techniques have been applied to process unstructured or nonquantitative clinical data in medical notes for classifying or predicting health status (eg, risk of heart disease and stage of cancer) through information extraction, semantic representation learning, and outcome prediction [<xref ref-type="bibr" rid="ref19">19</xref>]. Recently, NLP applications have been extended to unstructured PRO and symptom data stored in electronic medical records (EMRs) [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. A review study [<xref ref-type="bibr" rid="ref22">22</xref>] found that most previous NLP applications for unstructured PRO data largely focused on rule-based classifications (eg, extracting prespecified keywords or phrases from free text to identify cancer-related symptoms [<xref ref-type="bibr" rid="ref23">23</xref>]), followed by machine learning (ML) approach (eg, conditional random field model [<xref ref-type="bibr" rid="ref20">20</xref>], support vector machine [SVM] [<xref ref-type="bibr" rid="ref24">24</xref>], and boosting regression tree [<xref ref-type="bibr" rid="ref25">25</xref>]) to analyze associations with clinical outcomes.</p>
        <p>The method of capturing the features of unstructured PROs is an emerging area of research [<xref ref-type="bibr" rid="ref26">26</xref>]. Compared with rule-based extraction, the ML/deep learning–based NLP methods, including the context-independent or static (eg, term frequency–inverse document frequency [TF-IDF] [<xref ref-type="bibr" rid="ref27">27</xref>], global vectors for word representation [GloVe] [<xref ref-type="bibr" rid="ref28">28</xref>], and Word2vec [<xref ref-type="bibr" rid="ref29">29</xref>]), and context-dependent or dynamic (eg, bidirectional encoder representations from transformers [BERT]; [<xref ref-type="bibr" rid="ref30">30</xref>]) distributed representation methods are more suitable for processing unstructured PROs. Typically, context-dependent methods can capture the meaning of polysemous words, which substantially improves the flexibility and validity of analyzing unstructured PRO data.</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>To facilitate clinical decisions, our long-term goal is to collect PROs from survivor-caregiver-clinician conversations and apply NLP/ML methods to characterize meaningful PROs. Through in-depth interviews with childhood survivors of cancer and caregivers, this study evaluates the validity of using different novel NLP/ML methods (Word2vec/ML and BERT) to characterize 2 most common symptom domains (pain interference and fatigue) in child and adolescent survivors of cancer. The interview data were semantically labeled and coded by PRO content experts as the gold standard to represent specific symptom problems (defined as symptom attributes). In contrast to the static methods (ie, Word2vec/ML), we hypothesize that the use of dynamic methods (ie, BERT) would yield superior model performance.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Participants</title>
        <p>Study participants were survivors of pediatric cancer and their caregivers recruited from the After Completion of Therapy Clinic at St. Jude Children’s Research Hospital (<italic>St Jude</italic> hereafter) in Tennessee, United States, between August and December 2016. Eligible participants were identified from a list of survivors scheduled for annual follow-up and confirmed their eligibility through EMRs. We recruited survivors aged 8 to 17 years of age at annual follow-up, at least 2 years off therapy, and at least 5 years from initial cancer diagnosis. We excluded survivors who had acute or life-threatening conditions and required immediate medical care. We recruited caregivers who were the most knowledgeable of the survivor’s health status and could speak or read English. Assent from survivors and consent from caregivers was obtained. The research protocol was approved by the institutional review board of St Jude.</p>
      </sec>
      <sec>
        <title>In-Depth Interview and Data Abstraction</title>
        <p>This investigation builds on our previous study that elucidated the contents of 5 PRO domains (pain interference, fatigue, psychological stress, stigma, and meaning and purpose) related to pediatric cancer from survivors and caregivers [<xref ref-type="bibr" rid="ref15">15</xref>]. We randomly assigned 2 domains to each survivor and 2 to 3 domains to each caregiver. PRO domains were assigned randomly to each survivor and caregiver to elucidate PRO contents from both survivors and caregivers rather than comparing PRO discordances between dyadic participants. Diagnostic and clinical information was abstracted from EMRs. We designed separate interview guides (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>) with probes for each PRO domain, audio-recorded the interviews, transcribed interviews verbatim, and abstracted meaningful and interpretable sentences (ie, “meaning units”) [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      </sec>
      <sec>
        <title>Expert-Labeled Outcomes as the Gold Standard</title>
        <p>We used the methods developed in our previous studies to code the concepts of symptomatic problems collected from interviews and assigned the concepts to specific attributes [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Specifically, we began with abstracting the sentences or paragraphs collected from the interviews that are relevant to the experiences with particular symptomatic problems, such as presence, frequency, or intensity, and how these symptomatic problems affect daily activities (defined as meaning units) and then mapped the meaning units to analyzable, interpretable formats that represent the contents of items included in the Patient-Reported Outcomes Measurement Information System (PROMIS) banks [<xref ref-type="bibr" rid="ref32">32</xref>] (defined as meaningful concepts). Subsequently, we labeled the meaningful concepts by distinct concepts, including physical, cognitive, and social (defined as attributes) concepts.</p>
        <p>The associations among meaning units, meaningful concepts, and corresponding attributes are illustrated in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. For example, in the pain interference domain, when a survivor stated that “Can’t play, and go outside when I have a headache,” we mapped this meaning unit to the meaningful concept “Hard to do sports or exercise when had pain,” and then labeled this meaningful concept as the <italic>physical</italic> attribute. For the fatigue domain, when a survivor stated that “It’s hard to get my school work done when I’m tired,” we mapped this meaning unit to the meaningful concept “Hard to keep up with schoolwork” and then labeled this meaningful concept as the <italic>cognitive</italic> attribute.</p>
        <p>In addition, 2 PRO content experts (JLC and CMJ) independently reviewed the content of each meaning unit derived from the symptom domains and mapped each meaning unit to the content of individual items listed in the PROMIS pain interference and fatigue item banks [<xref ref-type="bibr" rid="ref32">32</xref>]. In total, 391 and 423 meaning units representing pain interference and fatigue domains, respectively, were included in the analysis, and each meaning unit was labeled and coded as problematic symptoms based on key attributes (physical, cognitive, social, and unspecified). Discrepancies in the mapping process were resolved by consensus between 2 senior investigators (CBF and ICH). PROMIS has applied rigorous standards to develop a comprehensive list of PRO items, therefore serving as a foundation for evaluating PRO contents [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. This mapping process has been adopted in previous research to facilitate the abstraction and mapping of qualitative data [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. In this study, the expert-labeled symptoms attributed to each meaning unit were deemed the gold standard for testing the validity of NLP/ML methods.</p>
        <p>We evaluated the interrater reliability based on the raw concordance rate (defined as the percentage of coded meaning units that 2 coders provide concordant ratings), and Cohen κ statistic (defined as the number of concordant ratings to the number of discordant ratings while considering the agreement that is expected by chance). In our study, raw concordance rates were 88% for the pain interference domain and 86% for the fatigue domain. Cohen κ statistic was 0.6 for both domains, which is considered moderate or good reliability for coding qualitative PRO data [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
      </sec>
      <sec>
        <title>NLP/ML Pipeline</title>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> outlines the pipeline of NLP/ML methods consisting of 2 key components: (1) extracting semantic features from the unstructured PROs and (2) using expert-labeled attributes of symptoms to validate NLP/ML–generated semantic features. We used the Word2vec [<xref ref-type="bibr" rid="ref29">29</xref>] and BERT [<xref ref-type="bibr" rid="ref30">30</xref>] methods to create multivariate semantic features (ie, word vectors) for each word from the meaning units. The BERT method embeds deep neural networks as a single step to perform abstraction and validation for the semantic features of symptom data simultaneously, whereas Word2vec/ML techniques involve 2 separate steps to achieve these tasks (<xref rid="figure1" ref-type="fig">Figure 1</xref> and <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The natural language processing and machine learning pipeline to analyze unstructured patient-reported outcomes data. BERT: bidirectional encoder representations from transformers; PROs: patient-reported outcomes; SVM: support vector machine; XGBoost: extreme gradient boosting.</p>
          </caption>
          <graphic xlink:href="jmir_v23i11e26777_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>BERT (Base, Uncased) for PRO Feature Extraction and Validation</title>
        <p>The BERT (base, uncased; or the <italic>BERT</italic> hereafter), our primary interest in the NLP method, consists of the multilayer neural networks known as encoder transformers, and each generates context-dependent word features by weighting the features of each word with the other words in the meaning units [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. We used 12 stacked layers of encoders to explore phrase-level, syntactic, semantic, and contextual information [<xref ref-type="bibr" rid="ref42">42</xref>]. Specifically, we used the semantic features pretrained by articles published in BooksCorpus and Wikipedia to generate general word semantic meanings (pretrained model in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]). The BERT model is augmented with a classification component, consisting of a feed-forward neural network and a softmax layer [<xref ref-type="bibr" rid="ref44">44</xref>] to classify unstructured PROs (fine-tuning process in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendices 5</xref> and <xref ref-type="supplementary-material" rid="app6">6</xref>). This augmented model was fine-tuned by the meaning units collected from interviews, which adapts the sentence contextual representation in encoders to the symptom-related contexts, and the parameters in the classification component were estimated simultaneously in one step.</p>
        <p>Specifically, we used the pretrained model (BERT [base, uncased]) from the huggingface model repository, which was a pytorch implementation of the base BERT model [<xref ref-type="bibr" rid="ref30">30</xref>]. The pretrained model is essentially based on the text passages included in BooksCorpus [<xref ref-type="bibr" rid="ref43">43</xref>] and the English Wikipedia [<xref ref-type="bibr" rid="ref30">30</xref>]. The weight parameters in the pretrained BERT model were further fine-tuned with the texts in the meaning units from our interview data when the BERT model was used for the downstream classification task of the meaning units through the BertForSequenceClassification object in the pytorch_transformers module. The use of BooksCorpus and Wikipedia is appropriate for our survivors of pediatric cancer as both contain comprehensive generic terms that capture the heterogeneous health status experienced by varying survivors of cancer, ranging from healthy (no late effects and no symptoms) to ill (severe late effects with severe symptoms).</p>
      </sec>
      <sec>
        <title>Word2vec Method for PRO Feature Extraction and ML for Validation</title>
        <p>We used Word2vec, our secondary interest in the NLP method, to extract semantic features based on the similarity of words in meaning units. Embedded with a one-level neural network model (<xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>), Word2vec defines the semantic similarity across different words by using a specific word to search and connect other words nearby, given the hypothesis that a word’s meaning is given by adjacent words [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. We adopted the semantic features already pretrained by English articles from Wikipedia [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>] to generate and fine-tune the semantic meanings of the meaning units through our data (<xref rid="figure1" ref-type="fig">Figure 1</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendices 6</xref> and <xref ref-type="supplementary-material" rid="app7">7</xref>).</p>
        <p>We used 2 ML methods, including the extreme gradient boosting (XGBoost) [<xref ref-type="bibr" rid="ref25">25</xref>] and the SVM [<xref ref-type="bibr" rid="ref24">24</xref>], to validate the semantic features derived from Word2vec in associations with the expert-labeled symptom attributes. ML modeling was used to account for high dimensional structures of semantic features created by Word2vec [<xref ref-type="bibr" rid="ref29">29</xref>] (<xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>). Specifically, XGBoost is a robust regression tree approach that includes multiple simple decision trees to iteratively refine the model performance by minimizing the difference between the expected and expert-labeled outcomes. In contrast, SVM is a classical ML algorithm that aims to find a decision boundary to separate the semantic features corresponding to the expert-labeled attributes by minimizing classification errors.</p>
      </sec>
      <sec>
        <title>Alternative Methods for PRO Feature Extraction</title>
        <p>In addition to the BERT, Word2vec/SVM, and Word2vec/XGBoost models, we conducted pilot analyses to evaluate 6 alternative NLP/ML models, including the TF-IDF/SVM, GloVe/SVM, and GloVe/XGBoost, as well as 3 extended BERT models (BioBERT, BlueBERT, and Clinical BERT). Briefly, the TF-IDF is an automatic text analysis that accounts for the number of times a word appears in a document and the number of documents that contain the word [<xref ref-type="bibr" rid="ref27">27</xref>]. The GloVe method identifies the global word similarity over several meaning units (ie, our unit of analysis) or the entire interview [<xref ref-type="bibr" rid="ref28">28</xref>]. The 3 alternative BERT models for pilot testing included the BioBERT (base, cased and trained on PubMed 1M) [<xref ref-type="bibr" rid="ref49">49</xref>], BlueBERT (base, uncased and trained on PubMed) [<xref ref-type="bibr" rid="ref50">50</xref>], and Clinical BERT (base, cased, initialized from BioBERT and trained on all MIMIC-III notes) [<xref ref-type="bibr" rid="ref51">51</xref>].</p>
        <p>As demonstrated in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>, the areas under the precision-recall (PR) curves for the BERT model were significantly superior to the TF-IDF/SVM, GloVe/SVM, and GloVe/XGBoost (all attributes over 2 symptom domains) and were significantly superior to the BioBERT, BlueBERT, and Clinical BERT models (especially physical and cognitive attributes in the pain interference domain). In addition, the use of GloVe/SVM, Word2vec/SVM, and Word2vec/XGBoost methods resulted in statistically nonsignificant differences. Model performances based on other evaluation metrics were reported in <xref ref-type="supplementary-material" rid="app9">Multimedia Appendices 9</xref> and <xref ref-type="supplementary-material" rid="app10">10</xref>. As the main purpose of this study was to identify the NLP/ML model with optimal performance for symptom assessment, we focused on comparisons between the BERT model (as a theoretically optimal method) and the Word2vec model accompanied by SVM and XGBoost (as a suboptimal method).</p>
      </sec>
      <sec>
        <title>Model Training and Evaluation</title>
        <p>We used a 5-folder nested cross-validation approach (<xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>) to address the issue of small sample size, including the components of partitioning the training, validation and test sets, determining the tuning parameters in ML methods, and generating validation results. Given the 4-attribute classification (physical, cognitive, social, and unclassified) on each meaning unit, we used a one-versus-rest binary classifier to classify one attribute (physical, cognitive, or social) versus the remaining attributes (the reference) for model training and evaluation [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
        <p>We used standard metrics to test the validity of NLP/ML models, including precision (ie, positive predictive value), sensitivity (ie, recall), specificity, accuracy (summarizing true positive and true negative), F1 score (summarizing sensitivity and positive predictive values), areas under the receiver operating characteristic (ROC) curve, and areas under the PR curve. In the case of imbalanced data (ie, a limited number of meaning units labeled as attribute presence versus that of the reference), the PR curve is more suitable than the ROC curve as the former focuses on precision and sensitivity related to true positive cases [<xref ref-type="bibr" rid="ref53">53</xref>]. On the basis of a recommendation [<xref ref-type="bibr" rid="ref53">53</xref>], we determined the baseline threshold for each attribute of a symptom domain as the percentage of meaning units that were rated by 2 coders or content experts (ie, the gold standard for labeling true presence of attribute), which represents the precision of a random guess classifier.</p>
        <p>Our NLP framework benefits from the transfer learning framework, which uses a huge amount of related data in the public domains to improve the ML application with regular sample sizes. Specifically, our Word2vec and BERT models or algorithms were pretrained by millions of health-related information in the public domains (eg, Wikipedia). Our meaning units were only used to fine-tune or improve the pretrained model and as predictive samples. Although our sample size was not large, it was sufficient to achieve robust validation and predictive performance. The codes used for BERT modeling are available on the GitHub website [<xref ref-type="bibr" rid="ref54">54</xref>]; the fully deidentified unstructured PRO data used in this study can be shared for research purposes on user’s request.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Participant Characteristics</title>
        <p><xref ref-type="table" rid="table1">Table 1</xref> reports the participant characteristics. The mean (SD) ages of survivors (N=52) and caregivers (N=35) at interviews were 13.8 (2.8) and 39.6 (7.0) years, respectively. Approximately 42% (22/52) of survivors were treated for noncentral nervous system solid tumors and 33% (17/52) for leukemia. For meaning units, 391 in the pain interference domain—of the 391 units, 255 (65.2%) were from survivors, and 136 (34.8%) were from caregivers—and 423 in the fatigue domain—of the 423 units, 275 (65%) were from survivors, and 148 (35%) were from caregivers— were labeled and analyzed accordingly (<xref ref-type="supplementary-material" rid="app12">Multimedia Appendix 12</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Characteristics of study participants (N=87).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="490"/>
            <col width="0"/>
            <col width="340"/>
            <col width="0"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristics</td>
                <td colspan="2">Survivors (n=52)</td>
                <td>Caregivers (n=35)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Age at evaluation (years), mean (SD)</td>
                <td colspan="2">13.8 (2.8)</td>
                <td>39.6 (7.0)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Sex, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td colspan="2">31 (61)</td>
                <td colspan="2">32 (91)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td colspan="2">20 (39)</td>
                <td colspan="2">3 (9)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Race or ethnicity, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>White, non-Hispanic</td>
                <td colspan="2">30 (59)</td>
                <td colspan="2">24 (69)</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Black, non-Hispanic</td>
                <td colspan="2">14 (28)</td>
                <td colspan="2">10 (29)</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Other</td>
                <td colspan="2">7 (14)</td>
                <td colspan="2">1 (3.0)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Cancer diagnosis, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-CNS<sup>a</sup> solid tumor</td>
                <td colspan="2">22 (42)</td>
                <td colspan="2">N/A<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Leukemia</td>
                <td colspan="2">17 (33)</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CNS malignancy</td>
                <td colspan="2">9 (17)</td>
                <td colspan="2">N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Lymphoma</td>
                <td colspan="2">4 (8.0)</td>
                <td colspan="2">N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>CNS: central nervous system.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Sensitivity, Specificity, Precision, and Accuracy for Pain Interference</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> reports the model performance for the pain interference domain based on survivor and caregiver data. For the sensitivity metric, compared with Word2vec/SVM and Word2vec/XGBoost, BERT generated higher values in identifying problems with 3 attributes (physical, cognitive, and social); however, the values were largely &#60;0.6. In contrast, all 3 methods produced specificity of &#62;0.9, and Word2vec/XGBoost produced higher values in identifying problems with 3 attributes compared with BERT and Word2vec/SVM. For F1-statistics, BERT yielded higher values for all 3 attributes compared with Word2vec/SVM and Word2vec/XGBoost. BERT yielded higher accuracy for all 3 attributes compared with Word2vec/SVM and Word2vec/XGBoost; the values were all &#62;0.8, specifically 0.931 (95% CI 0.905-0.957), 0.916 (95% CI 0.887-0.941), and 0.870 (95% CI 0.836-0.903) for cognitive, social, and physical attributes, respectively.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Performance of natural language processing/machine learning models for pain interference domain by 3 symptom attributes.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="180"/>
            <col width="100"/>
            <col width="100"/>
            <col width="90"/>
            <col width="130"/>
            <col width="110"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Attributes and models</td>
                <td>Precision (95% CI)</td>
                <td>Sensitivity (95% CI)</td>
                <td>Specificity (95% CI)</td>
                <td>Accuracy (95% CI)</td>
                <td>F1 (95% CI)</td>
                <td>AUROCC<sup>a</sup> (95% CI)</td>
                <td>AUPRC<sup>b</sup> (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="9">
                  <bold>Physical</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT<sup>c</sup></td>
                <td>0.692 (0.555-0.811)</td>
                <td>0.507 (0.387-0.618)</td>
                <td>0.950 (0.924-0.972)</td>
                <td>0.870 (0.836-0.903)</td>
                <td>0.585 (0.467-0.683)</td>
                <td>0.875 (0.824-0.948)</td>
                <td>0.677 (0.568-0.770)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/SVM<sup>d</sup></td>
                <td>0.722 (0.562-0.867)</td>
                <td>0.366 (0.262-0.479)</td>
                <td>0.969 (0.948-0.987)</td>
                <td>0.859 (0.824-0.893)</td>
                <td>0.486 (0.362-0.594)</td>
                <td>0.868 (0.826-0.922)</td>
                <td>0.623 (0.5090.743)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/XGBoost<sup>e</sup></td>
                <td>0.697 (0.528-0.857)</td>
                <td>0.324 (0.221-0.435)</td>
                <td>0.969 (0.949-0.987)</td>
                <td>0.852 (0.813-0.887)</td>
                <td>0.442 (0.318-0.551)</td>
                <td>0.830 (0.769-0.888)</td>
                <td>0.553 (0.437-0.659)</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Cognitive</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT</td>
                <td>0.800 (0.657-0.935)</td>
                <td>0.583 (0.432-0.735)</td>
                <td>0.980 (0.964-0.994)</td>
                <td>0.931 (0.905-0.957)</td>
                <td>0.675 (0.543-0.779)</td>
                <td>0.923 (0.879-0.997)</td>
                <td>0.818 (0.735-0.917)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/SVM</td>
                <td>0.760 (0.583-0.920)</td>
                <td>0.396 (0.254-0.533)</td>
                <td>0.983 (0.967-0.994)</td>
                <td>0.910 (0.882-0.939)</td>
                <td>0.521 (0.361-0.648)</td>
                <td>0.900 (0.863-0.957)</td>
                <td>0.609 (0.434-0.761)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/XGBoost</td>
                <td>0.769 (0.500-1.000)</td>
                <td>0.208 (0.104-0.333)</td>
                <td>0.991 (0.980-1.000)</td>
                <td>0.895 (0.867-0.926)</td>
                <td>0.328 (0.178-0.474)</td>
                <td>0.828 (0.748-0.905)</td>
                <td>0.474 (0.321-0.630)</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Social</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT</td>
                <td>0.636 (0.461-0.800)</td>
                <td>0.500 (0.349-0.652)</td>
                <td>0.966 (0.946-0.983)</td>
                <td>0.916 (0.887-0.941)</td>
                <td>0.560 (0.410-0.690)</td>
                <td>0.857 (0.786-0.918)</td>
                <td>0.566 (0.402-0.750)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/SVM</td>
                <td>0.286 (0-0.668)</td>
                <td>0.048 (0-0.118)</td>
                <td>0.986 (0.973-0.997)</td>
                <td>0.885 (0.854-0.916)</td>
                <td>0.082 (0.035-0.200)</td>
                <td>0.804 (0.742-0.878)</td>
                <td>0.309 (0.173-0.426)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/XGBoost</td>
                <td>0.556 (0.222-0.875)</td>
                <td>0.119 (0.029-0.229)</td>
                <td>0.989 (0.977-0.997)</td>
                <td>0.895 (0.864-0.923)</td>
                <td>0.196 (0.072-0.343)</td>
                <td>0.786 (0.728-0.850)</td>
                <td>0.304 (0.148-0.420)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>AUROCC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>AUPRC: area under precision-recall curve.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>BERT: bidirectional encoder representations from transformers.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>XGBoost: extreme gradient boosting.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Sensitivity, Specificity, Precision, and Accuracy for Fatigue</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> reports the model performance for the fatigue domain based on the survivor and caregiver data. For sensitivity, the BERT method generated higher values in identifying problems with 3 attributes compared with Word2vec/SVM and Word2vec/XGBoost; however, the values were largely &#60;0.5, except cognitive attributes (0.757). In contrast, all 3 methods produced specificity &#62;0.9, and Word2vec/SVM produced higher values in identifying problems with 3 attributes compared with BERT and Word2vec/XGBoost. The BERT model yielded higher F1-statistics for all 3 individual attributes compared with Word2vec/SVM and Word2vec/XGBoost. In addition, the BERT model produced higher accuracy for all 3 attributes compared with Word2vec/SVM and Word2vec/XGBoost; the values were all &#62;0.8, specifically 0.929 (95% CI 0.903-0.953), 0.917 (95% CI 0.891-0.943), and 0.832 (95% CI 0.794-0.867) for cognitive, social, and physical attributes, respectively.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance of natural language processing/machine learning models for fatigue domain by 3 symptom attributes.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="180"/>
            <col width="100"/>
            <col width="100"/>
            <col width="90"/>
            <col width="130"/>
            <col width="110"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Attributes and models</td>
                <td>Precision (95% CI)</td>
                <td>Sensitivity (95% CI)</td>
                <td>Specificity (95% CI)</td>
                <td>Accuracy (95% CI)</td>
                <td>F1 (95% CI)</td>
                <td>AUROCC<sup>a</sup> (95% CI)</td>
                <td>AUPRC<sup>b</sup> (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="9">
                  <bold>Physical</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT<sup>c</sup></td>
                <td>0.593 (0.468-0.717)</td>
                <td>0.427 (0.315-0.538)</td>
                <td>0.929 (0.901-0.956)</td>
                <td>0.832 (0.794-0.867)</td>
                <td>0.496 (0.384-0.593)</td>
                <td>0.775 (0.723-0.848)</td>
                <td>0.537 (0.443-0.634)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/SVM<sup>d</sup></td>
                <td>0.600 (0.286-0.900)</td>
                <td>0.073 (0.026-0.136)</td>
                <td>0.988 (0.974-0.997)</td>
                <td>0.810 (0.770-0.848)</td>
                <td>0.130 (0.048-0.227)</td>
                <td>0.726 (0.670-0.780)</td>
                <td>0.375 (0.224-0.474)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/XGBoost<sup>e</sup></td>
                <td>0.595 (0.432-0.773)</td>
                <td>0.268 (0.169-0.364)</td>
                <td>0.956 (0.934-0.977)</td>
                <td>0.822 (0.784-0.858)</td>
                <td>0.370 (0.250-0.474)</td>
                <td>0.726 (0.665-0.798)</td>
                <td>0.461 (0.338-0.575)</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Cognitive</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT</td>
                <td>0.803 (0.696-0.895)</td>
                <td>0.757 (0.652-0.854)</td>
                <td>0.963 (0.941-0.981)</td>
                <td>0.929 (0.903-0.953)</td>
                <td>0.779 (0.697-0.855)</td>
                <td>0.948 (0.922-0.979)</td>
                <td>0.855 (0.791-0.930)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/SVM</td>
                <td>0.829 (0.690-0.946)</td>
                <td>0.414 (0.292-0.535)</td>
                <td>0.983 (0.968-0.994)</td>
                <td>0.889 (0.861-0.917)</td>
                <td>0.552 (0.418-0.657)</td>
                <td>0.917 (0.886-0.951)</td>
                <td>0.730 (0.632-0.855)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/XGBoost</td>
                <td>0.767 (0.625-0.884)</td>
                <td>0.471 (0.359-0.586)</td>
                <td>0.972 (0.953-0.988)</td>
                <td>0.889 (0.858-0.917)</td>
                <td>0.584 (0.468-0.684)</td>
                <td>0.860 (0.817-0.924)</td>
                <td>0.659 (0.550-0.782)</td>
              </tr>
              <tr valign="top">
                <td colspan="9">
                  <bold>Social</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT</td>
                <td>0.679 (0.500-0.848)</td>
                <td>0.422 (0.289-0.568)</td>
                <td>0.976 (0.960-0.990)</td>
                <td>0.917 (0.891-0.943)</td>
                <td>0.521 (0.379-0.658)</td>
                <td>0.796 (0.704-0.912)</td>
                <td>0.561 (0.434-0.741)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/SVM</td>
                <td>0.778 (0.429-1.000)</td>
                <td>0.156 (0.057-0.267)</td>
                <td>0.995 (0.987-1.000)</td>
                <td>0.905 (0.877-0.929)</td>
                <td>0.259 (0.102-0.406)</td>
                <td>0.817 (0.756-0.881)</td>
                <td>0.393 (0.203-0.534)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Word2vec/XGBoost</td>
                <td>0.571 (0.286-0.833)</td>
                <td>0.178 (0.068-0.300)</td>
                <td>0.984 (0.971-0.995)</td>
                <td>0.898 (0.868-0.924)</td>
                <td>0.271 (0.118-0.415)</td>
                <td>0.780 (0.706-0.850)</td>
                <td>0.330 (0.154-0.436)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>AUROCC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>AUPRC: area under precision-recall curve.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>BERT: bidirectional encoder representations from transformers.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>XGBoost: extreme gradient boosting.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Area Under the ROC Curves for Pain Interference and Fatigue</title>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> (upper) displays the specific NLP/ML method that had the highest area under the ROC curves for each attribute (detailed results in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref>). The diagonal line represents the random guess (ie, reference). For the pain interference domain (left panel), the BERT model was superior to the Word2vec/SVM and Word2vec/XGBoost models, and the areas under the ROC curve were 0.923 (95% CI 0.879-0.997) for cognitive, 0.875 (95% CI 0.824-0.948) for physical attributes, and 0.857 (95% CI 0.786-0.918) for social attributes. For the fatigue domain (right panel), the BERT model was superior to the Word2vec/SVM and Word2vec/XGBoost models, and areas under the ROC curve were (0.948, 95% CI 0.922-0.979) for cognitive and 0.775 (95% CI 0.723-0.848) for physical attributes. The values of BERT were significantly higher in identifying problems with cognitive attributes in both pain interference and fatigue domains compared with Word2vec/XGBoost (<italic>P</italic>&#60;.05; <xref ref-type="supplementary-material" rid="app13">Multimedia Appendix 13</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Area under the receiver operating characteristic curves and precision-recall curves for the best models of pain interference domain (left column) and fatigue domain (right column) by 3 symptom attributes. BERT: bidirectional encoder representations from transformers; PR: precision recall; ROC: receiver operating characteristic; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="jmir_v23i11e26777_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Area Under the PR Curves for Pain Interference and Fatigue</title>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> (lower) displays the specific NLP/ML method that had the highest area under the PR curves for each attribute (see detailed results in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref>). The horizontal line at the bottom represents a random guess (ie, reference). For the pain interference domain (left panel), the BERT model was superior to the Word2vec/SVM and Word2vec/XGBoost models, and the areas under the PR curve were 0.818 (95% CI 0.735-0.917) for cognitive, 0.677 (95% CI 0.568-0.770) for physical attributes, and 0.566 (95% CI 0.402-0.750) for social attributes. For the fatigue domain (right panel), the BERT models were superior to the Word2vec/SVM and Word2vec/XGBoost models, and areas under the PR curve were 0.855 (95% CI 0.791-0.930) for cognitive, 0.561 (95% CI 0.434-0.741) for social attributes, and 0.537 (95% CI 0.443-0.634) for physical attributes. In addition, the values of BERT were significantly higher in identifying problems with cognitive and social attributes in both pain interference and fatigue domains compared with both Word2vec/SVM and Word2vec/XGBoost (<italic>P</italic>&#60;.05; <xref ref-type="supplementary-material" rid="app13">Multimedia Appendices 13</xref> and <xref ref-type="supplementary-material" rid="app14">14</xref>).</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Very limited studies have demonstrated the feasibility of applying NLP/ML methods to extract semantic features from unstructured PROs. This study applied different NLP/ML models to analyze PRO assessment in pediatric cancer survivorship, with a special focus on young survivors of pediatric cancer aged &#60;18 years as a vulnerable population, and used rigorous methods to validate the performance of NLP/ML models. The results suggest that the BERT method outperformed the Word2vec/ML methods across different validation metrics in both the physical interference and fatigue symptom domains. Specifically, the BERT method yielded higher accuracy (&#62;0.8), larger area under the ROC curve (&#62;0.8, except for the social attribute in fatigue domain), and a larger area under the PR curve in identifying problems with all 3 attributes over 2 symptom domains compared with the Word2vec/SVM and Word2vec/XGBoost methods. The models with higher accuracy were characterized by high specificity (&#62;0.9) but low sensitivity (&#60;0.5) for all 3 attributes and 2 symptom domains.</p>
        <p>The findings of high specificity and low sensitivity suggest that our NLP/ML algorithms can be used to identify problematic symptoms (ie, diagnostic confirmation) rather than for symptom screening. However, if the default threshold (ie, 0.5) for ROC curves was changed to a lower value that mimics the proportion of meaning units labeled as the presence of the problematic attribute, both specificity and sensitivity will reach the level of 0.7-0.8. How to use NLP/ML techniques to convert unstructured PROs into semantic features and transform the data into meaningful diagnostic information for clinical decision-making is an emerging topic [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. It is important to extend our NLP/ML pipeline to assess other aspects of symptom problems (eg, severity and interference) for cancer populations and in a longitudinal context, which is valuable for detecting changes in symptom patterns and identifying early signs of adverse events [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>].</p>
      </sec>
      <sec>
        <title>Comparisons of Model Performance</title>
        <p>In both symptom domains, the performance of NLP/ML techniques (accuracy, F1 value, and areas under ROC and PR curves) in identifying problems with cognitive attributes was superior to physical and social attributes. Interestingly, model validity based on data collected from survivors and caregivers was slightly better than that of survivors alone (<xref ref-type="supplementary-material" rid="app15">Multimedia Appendices 15</xref> and <xref ref-type="supplementary-material" rid="app15">16</xref>). This finding is in part because of the inclusion of complementary information from survivors and caregivers and the increase in sample size.</p>
        <p>The superior performance of NLP/ML techniques suggests the usefulness of interview-based methods for collecting unstructured PRO data to complement the survey-based methods that contain a prespecified fixed content of PROs in follow-up care among survivors of cancer. Using our validated NLP/ML algorithms to automatically abstract and label the semantic features of unstructured PROs derived from interviews represents an efficient strategy for collecting PRO data from busy clinics. Our NLP/ML approach can be extended to analyze other forms of unstructured PROs (eg, documented patient-clinician conversations and medical notes in EMRs) when data are available. Other novel technologies (eg, audio-recorded PROs) also deserve investigation in analyzing unstructured PROs. Multimodal sentiment analysis [<xref ref-type="bibr" rid="ref58">58</xref>], which investigates affective states by extracting textual and audio features, can be combined with the semantic features from NLP to obtain a comprehensive understanding of survivors’ PROs. The successful application of NLP/ML for PRO assessment ideally requires the implementation of integrated platforms that interconnect the EHR-based medical note systems, NLP/ML analytics, and supportive tools for result display, clinical interpretation, and treatment recommendation [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref59">59</xref>-<xref ref-type="bibr" rid="ref61">61</xref>]. The integrated platforms will facilitate clinicians in clinical decision-making for caring for survivors of cancer whose complex late medical effects can be predicted by the deterioration of symptoms and clinical parameters.</p>
        <p>The superior performance of BERT to the Word2vec/ML method is because of the flexible design of BERT that accounts for contextual information of PROs. Basically, BERT includes multilayer deep neural networks (illustrated in self-attention layers of the fine-tuning process; <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>) to enable flexible feature extraction at different levels, such as syntactic, semantic, and contextual information. In comparison, Word2vec includes a one-level shallow neural network with limited flexibility. Uniquely, the semantic features derived by BERT capture different meanings of the same word in different contexts, whereas Word2vec generates static semantic features for each word that does not vary in different contexts.</p>
      </sec>
      <sec>
        <title>Different NLP Methods for Analyzing Unstructured PRO Data</title>
        <p>The clinical application of NLP/ML in PRO research is still in its infancy. This study used the BERT model pretrained by Wikipedia and BooksCorpus to generate general semantic features as a starting point. The use of BooksCorpus and Wikipedia is appropriate for survivors of pediatric cancer, resulting in satisfactory model performance. This is because BooksCorpus and Wikipedia contain comprehensive generic terms that capture the heterogeneous health conditions experienced by various populations, including survivors of cancer, ranging from healthy (no late effects and no symptoms) to ill (severe late effects with severe symptoms). Alternatively, BERT models can be pretrained using larger free text data to generate comprehensive features of PROs. Similar methods may include SciBERT [<xref ref-type="bibr" rid="ref62">62</xref>], trained by texts in Semantic Scholar; BioBERT [<xref ref-type="bibr" rid="ref49">49</xref>], trained by texts in PubMed; and Clinical BERT [<xref ref-type="bibr" rid="ref51">51</xref>], trained by clinical notes in MIMIC-III [<xref ref-type="bibr" rid="ref63">63</xref>]. In addition, the health knowledge graph [<xref ref-type="bibr" rid="ref64">64</xref>] can be used to integrate different concepts from various data elements in multiomics frameworks (including unstructured PROs in medical notes, structured PROs from patient survey, imaging, genetics, and treatment profiles), and analyze complex relationships among these data to improve evaluations of survivorship outcomes through a multitask learning framework [<xref ref-type="bibr" rid="ref65">65</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study contains several limitations. First, our samples were limited to survivors of pediatric cancer who were treated at a single institution. However, our samples represent diverse diagnoses, ages, races and ethnicities, and families residing in counties with poverty levels similar to the national average [<xref ref-type="bibr" rid="ref15">15</xref>]. Second, we only analyzed pain interference and fatigue domains and restricted them to 3 key attributes of symptoms. Future studies are encouraged to apply our NLP/ML pipeline to analyze other PRO domains and include more comprehensive attribute classifications. Third, our data were collected cross-sectionally, which merely provides a snapshot of PROs. Future studies are needed to test the validity of abstracting longitudinal unstructured PROs to identify time-dependent patterns. In summary, we demonstrated a robust validity of NLP/ML algorithms in abstracting and analyzing unstructured PROs collected from interviews with childhood survivors of cancer and caregivers. These promising results suggest the utility of NLP/ML methods in future works for monitoring survivors’ PROs and the opportunity of extending our methods to other PRO domains and data collection systems (eg, audio-recorded or medical notes) under a unified platform that integrates EHR-based data collection systems, NLP/ML analytics, and supportive tools for interpretation of results and treatment recommendations. Integration of NLP/ML-based PRO assessment to complement other clinical data will facilitate the improvement of follow-up care for survivors of cancer.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Interview guides for pain interference domain (cancer survivor).</p>
        <media xlink:href="jmir_v23i11e26777_app1.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Interview guides for fatigue domain (cancer survivor).</p>
        <media xlink:href="jmir_v23i11e26777_app2.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Examples of meaning units derived from study participants and corresponding attributes.</p>
        <media xlink:href="jmir_v23i11e26777_app3.docx" xlink:title="DOCX File , 24 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>The natural language processing/machine learning pipeline to analyze unstructured patient-reported outcomes data.</p>
        <media xlink:href="jmir_v23i11e26777_app4.docx" xlink:title="DOCX File , 88 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Concept of bidirectional encoder representations from transformers [base, uncased] techniques.</p>
        <media xlink:href="jmir_v23i11e26777_app5.docx" xlink:title="DOCX File , 489 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Tools or packages and fine-tuned hyper-parameters to analyze natural language processing/machine learning models.</p>
        <media xlink:href="jmir_v23i11e26777_app6.docx" xlink:title="DOCX File , 24 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Concept of Word2Vec techniques.</p>
        <media xlink:href="jmir_v23i11e26777_app7.docx" xlink:title="DOCX File , 426 KB"/>
      </supplementary-material>
      <supplementary-material id="app8">
        <label>Multimedia Appendix 8</label>
        <p>The changes of the area under the precision-recall curve among different natural language processing/machine learning models.</p>
        <media xlink:href="jmir_v23i11e26777_app8.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
      <supplementary-material id="app9">
        <label>Multimedia Appendix 9</label>
        <p>Performance of natural language processing/machine learning models for pain interference domain by three symptom attributes (cancer survivors and caregivers).</p>
        <media xlink:href="jmir_v23i11e26777_app9.docx" xlink:title="DOCX File , 30 KB"/>
      </supplementary-material>
      <supplementary-material id="app10">
        <label>Multimedia Appendix 10</label>
        <p>Performance of natural language processing/machine learning models for fatigue domain by three symptom attributes (cancer survivors and caregivers).</p>
        <media xlink:href="jmir_v23i11e26777_app10.docx" xlink:title="DOCX File , 30 KB"/>
      </supplementary-material>
      <supplementary-material id="app11">
        <label>Multimedia Appendix 11</label>
        <p>Five-fold cross-validation methods.</p>
        <media xlink:href="jmir_v23i11e26777_app11.docx" xlink:title="DOCX File , 767 KB"/>
      </supplementary-material>
      <supplementary-material id="app12">
        <label>Multimedia Appendix 12</label>
        <p>Frequency of attributes in pain interference and fatigue domains labeled by content experts.</p>
        <media xlink:href="jmir_v23i11e26777_app12.docx" xlink:title="DOCX File , 24 KB"/>
      </supplementary-material>
      <supplementary-material id="app13">
        <label>Multimedia Appendix 13</label>
        <p>The changes in the area under the receiver operating characteristic curve and precision-recall curve among different natural language processing/machine learning models (survivors and caregivers).</p>
        <media xlink:href="jmir_v23i11e26777_app13.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
      <supplementary-material id="app14">
        <label>Multimedia Appendix 14</label>
        <p>Precision-recall curves for pain interference and fatigue domains by three symptom attributes (survivors and caregivers).</p>
        <media xlink:href="jmir_v23i11e26777_app14.docx" xlink:title="DOCX File , 153 KB"/>
      </supplementary-material>
      <supplementary-material id="app15">
        <label>Multimedia Appendix 15</label>
        <p>Performance of natural language processing/machine learning models for pain interference domain by three symptom attributes (survivors only).</p>
        <media xlink:href="jmir_v23i11e26777_app15.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
      <supplementary-material id="app16">
        <label>Multimedia Appendix 16</label>
        <p>Performance of natural language processing/machine learning models for fatigue domain by three symptom attributes (survivors only).</p>
        <media xlink:href="jmir_v23i11e26777_app16.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PR</term>
          <def>
            <p>precision-recall</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PRO</term>
          <def>
            <p>patient-reported outcome</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PROMIS</term>
          <def>
            <p>Patient-Reported Outcomes Measurement Information System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">TF-IDF</term>
          <def>
            <p>term frequency–inverse document frequency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">XGBoost</term>
          <def>
            <p>extreme gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank Rachel M Keesey and Ruth J Eliason for conducting in-depth interviews with study participants, and Jennifer L Clegg and Conor M Jones for labeling the attributes of symptoms based on the interview data.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>ZL, JAS, and ICH contributed to the concept and design; CBF provided administrative support; MMH and LLR contributed to the provision of study materials JNB and ICH contributed to collection and assembly of data; ZL, JAS, JXW, and ICH contributed to data analysis and interpretation; ZL, JAS, and ICH contributed to manuscript writing; and all authors contributed to editing and final approval of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mariotto</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Rowland</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Yabroff</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Alfano</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Jemal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>Cancer treatment and survivorship statistics</article-title>
          <source>CA Cancer J Clin</source>
          <year>2019</year>
          <month>09</month>
          <volume>69</volume>
          <issue>5</issue>
          <fpage>363</fpage>
          <lpage>85</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3322/caac.21565"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/caac.21565</pub-id>
          <pub-id pub-id-type="medline">31184787</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Jemal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Cancer statistics, 2019</article-title>
          <source>CA Cancer J Clin</source>
          <year>2019</year>
          <month>01</month>
          <volume>69</volume>
          <issue>1</issue>
          <fpage>7</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3322/caac.21551"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/caac.21551</pub-id>
          <pub-id pub-id-type="medline">30620402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Padgett</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Leisenring</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Stratton</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Bishop</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Krull</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Alfano</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>de Moor</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hartigan</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>GT</given-names>
            </name>
            <name name-style="western">
              <surname>Robison</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Rowland</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Oeffinger</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Mariotto</surname>
              <given-names>AB</given-names>
            </name>
          </person-group>
          <article-title>Survivors of childhood cancer in the United States: Prevalence and burden of morbidity</article-title>
          <source>Cancer Epidemiol Biomarkers Prev</source>
          <year>2015</year>
          <month>04</month>
          <volume>24</volume>
          <issue>4</issue>
          <fpage>653</fpage>
          <lpage>63</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://cebp.aacrjournals.org/cgi/pmidlookup?view=long&#38;pmid=25834148"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/1055-9965.EPI-14-1418</pub-id>
          <pub-id pub-id-type="medline">25834148</pub-id>
          <pub-id pub-id-type="pii">24/4/653</pub-id>
          <pub-id pub-id-type="pmcid">PMC4418452</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oeffinger</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Mertens</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Sklar</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Kawashima</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Meadows</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Marina</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hobbie</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kadan-Lottick</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Leisenring</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Robison</surname>
              <given-names>LL</given-names>
            </name>
            <collab>Childhood Cancer Survivor Study</collab>
          </person-group>
          <article-title>Chronic health conditions in adult survivors of childhood cancer</article-title>
          <source>N Engl J Med</source>
          <year>2006</year>
          <month>10</month>
          <day>12</day>
          <volume>355</volume>
          <issue>15</issue>
          <fpage>1572</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMsa060185</pub-id>
          <pub-id pub-id-type="medline">17035650</pub-id>
          <pub-id pub-id-type="pii">355/15/1572</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>Melissa M</given-names>
            </name>
            <name name-style="western">
              <surname>Ness</surname>
              <given-names>Kirsten K</given-names>
            </name>
            <name name-style="western">
              <surname>Gurney</surname>
              <given-names>James G</given-names>
            </name>
            <name name-style="western">
              <surname>Mulrooney</surname>
              <given-names>Daniel A</given-names>
            </name>
            <name name-style="western">
              <surname>Chemaitilly</surname>
              <given-names>Wassim</given-names>
            </name>
            <name name-style="western">
              <surname>Krull</surname>
              <given-names>Kevin R</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>Daniel M</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>Gregory T</given-names>
            </name>
            <name name-style="western">
              <surname>Nottage</surname>
              <given-names>Kerri A</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>Kendra E</given-names>
            </name>
            <name name-style="western">
              <surname>Sklar</surname>
              <given-names>Charles A</given-names>
            </name>
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>Deo Kumar</given-names>
            </name>
            <name name-style="western">
              <surname>Robison</surname>
              <given-names>Leslie L</given-names>
            </name>
          </person-group>
          <article-title>Clinical ascertainment of health outcomes among adults treated for childhood cancer</article-title>
          <source>JAMA</source>
          <year>2013</year>
          <month>06</month>
          <day>12</day>
          <volume>309</volume>
          <issue>22</issue>
          <fpage>2371</fpage>
          <lpage>2381</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23757085"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2013.6296</pub-id>
          <pub-id pub-id-type="medline">23757085</pub-id>
          <pub-id pub-id-type="pmcid">PMC3771083</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>GT</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yasui</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Leisenring</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Mertens</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Stovall</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Oeffinger</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Krull</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Nathan</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Neglia</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Robison</surname>
              <given-names>LL</given-names>
            </name>
          </person-group>
          <article-title>Reduction in late mortality among 5-year survivors of childhood cancer</article-title>
          <source>N Engl J Med</source>
          <year>2016</year>
          <month>03</month>
          <day>03</day>
          <volume>374</volume>
          <issue>9</issue>
          <fpage>833</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26761625"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1510795</pub-id>
          <pub-id pub-id-type="medline">26761625</pub-id>
          <pub-id pub-id-type="pmcid">PMC4786452</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Brinkman</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Kenzik</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gurney</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Ness</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Lanctot</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shenkman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Robison</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Krull</surname>
              <given-names>KR</given-names>
            </name>
          </person-group>
          <article-title>Association between the prevalence of symptoms and health-related quality of life in adult survivors of childhood cancer: a report from the st jude lifetime cohort study</article-title>
          <source>J Clin Oncol</source>
          <year>2013</year>
          <month>11</month>
          <day>20</day>
          <volume>31</volume>
          <issue>33</issue>
          <fpage>4242</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24127449"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/JCO.2012.47.8867</pub-id>
          <pub-id pub-id-type="medline">24127449</pub-id>
          <pub-id pub-id-type="pii">JCO.2012.47.8867</pub-id>
          <pub-id pub-id-type="pmcid">PMC3821013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeltzer</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>Recklitis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Buchbinder</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zebrack</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Casillas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tsao</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Krull</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Psychological status in childhood cancer survivors: a report from the childhood cancer survivor study</article-title>
          <source>J Clin Oncol</source>
          <year>2009</year>
          <month>05</month>
          <day>10</day>
          <volume>27</volume>
          <issue>14</issue>
          <fpage>2396</fpage>
          <lpage>404</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19255309"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/JCO.2008.21.1433</pub-id>
          <pub-id pub-id-type="medline">19255309</pub-id>
          <pub-id pub-id-type="pii">JCO.2008.21.1433</pub-id>
          <pub-id pub-id-type="pmcid">PMC2677925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robison</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Survivors of childhood and adolescent cancer: Life-long risks and responsibilities</article-title>
          <source>Nat Rev Cancer</source>
          <year>2014</year>
          <month>01</month>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>61</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24304873"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nrc3634</pub-id>
          <pub-id pub-id-type="medline">24304873</pub-id>
          <pub-id pub-id-type="pii">nrc3634</pub-id>
          <pub-id pub-id-type="pmcid">PMC6425479</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spathis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Booth</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Grove</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hatcher</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Barclay</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Teenage and young adult cancer-related fatigue is prevalent, distressing, and neglected: It is time to intervene. A systematic literature review and narrative synthesis</article-title>
          <source>J Adolesc Young Adult Oncol</source>
          <year>2015</year>
          <month>03</month>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>3</fpage>
          <lpage>17</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25852970"/>
          </comment>
          <pub-id pub-id-type="doi">10.1089/jayao.2014.0023</pub-id>
          <pub-id pub-id-type="medline">25852970</pub-id>
          <pub-id pub-id-type="pii">10.1089/jayao.2014.0023</pub-id>
          <pub-id pub-id-type="pmcid">PMC4365509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosenberg</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Orellana</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ullrich</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Geyer</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Feudtner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dussel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Wolfe</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Quality of life in children with advanced cancer: a report from the PediQUEST study</article-title>
          <source>J Pain Symptom Manage</source>
          <year>2016</year>
          <month>08</month>
          <volume>52</volume>
          <issue>2</issue>
          <fpage>243</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0885-3924(16)30095-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jpainsymman.2016.04.002</pub-id>
          <pub-id pub-id-type="medline">27220948</pub-id>
          <pub-id pub-id-type="pii">S0885-3924(16)30095-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4996729</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolfe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Orellana</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ullrich</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>EF</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>TI</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenberg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Geyer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Feudtner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dussel</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Symptoms and distress in children with advanced cancer: Prospective patient-reported outcomes from the PediQUEST study</article-title>
          <source>J Clin Oncol</source>
          <year>2015</year>
          <month>06</month>
          <day>10</day>
          <volume>33</volume>
          <issue>17</issue>
          <fpage>1928</fpage>
          <lpage>35</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25918277"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/JCO.2014.59.1222</pub-id>
          <pub-id pub-id-type="medline">25918277</pub-id>
          <pub-id pub-id-type="pii">JCO.2014.59.1222</pub-id>
          <pub-id pub-id-type="pmcid">PMC4451175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Deuren</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Boonstra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van Dulmen-den Broeder</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Blijlevens</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Knoop</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Loonen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Severe fatigue after treatment for childhood cancer</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2020</year>
          <month>03</month>
          <day>03</day>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>CD012681</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32124971"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD012681.pub2</pub-id>
          <pub-id pub-id-type="medline">32124971</pub-id>
          <pub-id pub-id-type="pmcid">PMC7059965</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alberts</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Gagnon</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Stinson</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>Chronic pain in survivors of childhood cancer: a developmental model of pain across the cancer trajectory</article-title>
          <source>Pain</source>
          <year>2018</year>
          <month>10</month>
          <volume>159</volume>
          <issue>10</issue>
          <fpage>1916</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1097/j.pain.0000000000001261</pub-id>
          <pub-id pub-id-type="medline">29708940</pub-id>
          <pub-id pub-id-type="pii">00006396-201810000-00004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pierzynski</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Clegg</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Sim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Forrest</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Robison</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Baker</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Patient-reported outcomes in paediatric cancer survivorship: a qualitative study to elicit the content from cancer survivors and caregivers</article-title>
          <source>BMJ Open</source>
          <year>2020</year>
          <month>05</month>
          <day>17</day>
          <volume>10</volume>
          <issue>5</issue>
          <fpage>e032414</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=32423926"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2019-032414</pub-id>
          <pub-id pub-id-type="medline">32423926</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2019-032414</pub-id>
          <pub-id pub-id-type="pmcid">PMC7239535</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipscomb</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gotay</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Snyder</surname>
              <given-names>CF</given-names>
            </name>
          </person-group>
          <article-title>Patient-reported outcomes in cancer: a review of recent research and policy initiatives</article-title>
          <source>CA Cancer J Clin</source>
          <year>2007</year>
          <volume>57</volume>
          <issue>5</issue>
          <fpage>278</fpage>
          <lpage>300</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.3322/CA.57.5.278"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/CA.57.5.278</pub-id>
          <pub-id pub-id-type="medline">17855485</pub-id>
          <pub-id pub-id-type="pii">57/5/278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Warsame</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>D'Souza</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Patient reported outcomes have arrived: a practical overview for clinicians in using patient reported outcomes in oncology</article-title>
          <source>Mayo Clin Proc</source>
          <year>2019</year>
          <month>11</month>
          <volume>94</volume>
          <issue>11</issue>
          <fpage>2291</fpage>
          <lpage>301</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31563425"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.mayocp.2019.04.005</pub-id>
          <pub-id pub-id-type="medline">31563425</pub-id>
          <pub-id pub-id-type="pii">S0025-6196(19)30355-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6832764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Capturing the patient's perspective: a review of advances in natural language processing of health-related text</article-title>
          <source>Yearb Med Inform</source>
          <year>2017</year>
          <month>08</month>
          <volume>26</volume>
          <issue>1</issue>
          <fpage>214</fpage>
          <lpage>27</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.15265/IY-2017-029"/>
          </comment>
          <pub-id pub-id-type="doi">10.15265/IY-2017-029</pub-id>
          <pub-id pub-id-type="medline">29063568</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheikhalishahi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rinaldi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>04</month>
          <day>27</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e12239/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12239</pub-id>
          <pub-id pub-id-type="medline">31066697</pub-id>
          <pub-id pub-id-type="pii">v7i2e12239</pub-id>
          <pub-id pub-id-type="pmcid">PMC6528438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forsyth</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Barzilay</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Lui</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenz</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Enzinger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tulsky</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Lindvall</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Machine learning methods to extract documentation of breast cancer symptoms from electronic health records</article-title>
          <source>J Pain Symptom Manage</source>
          <year>2018</year>
          <month>06</month>
          <volume>55</volume>
          <issue>6</issue>
          <fpage>1492</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0885-3924(18)30082-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jpainsymman.2018.02.016</pub-id>
          <pub-id pub-id-type="medline">29496537</pub-id>
          <pub-id pub-id-type="pii">S0885-3924(18)30082-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hyun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Bakken</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Exploring the ability of natural language processing to extract data from nursing narratives</article-title>
          <source>Comput Inform Nurs</source>
          <year>2009</year>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>215</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19574746"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/NCN.0b013e3181a91b58</pub-id>
          <pub-id pub-id-type="medline">19574746</pub-id>
          <pub-id pub-id-type="pii">00024665-200907000-00005</pub-id>
          <pub-id pub-id-type="pmcid">PMC4415266</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koleck</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Dreisbach</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bourne</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Bakken</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of symptoms documented in free-text narratives of electronic health records: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>04</month>
          <day>01</day>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>364</fpage>
          <lpage>79</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30726935"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy173</pub-id>
          <pub-id pub-id-type="medline">30726935</pub-id>
          <pub-id pub-id-type="pii">5307912</pub-id>
          <pub-id pub-id-type="pmcid">PMC6657282</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chandran</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shetty</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sanyal</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Downs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fok</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Vermeulen</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Schirmbeck</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>de Haan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hayes</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Use of natural language processing to identify obsessive compulsive symptoms in patients with schizophrenia, schizoaffective disorder or bipolar disorder</article-title>
          <source>Sci Rep</source>
          <year>2019</year>
          <month>10</month>
          <day>02</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>14146</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-019-49165-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-019-49165-2</pub-id>
          <pub-id pub-id-type="medline">31578348</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-019-49165-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6775052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cristianini</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shawe-Taylor</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>An Introduction to Support Vector Machines and Other Kernel-Based Learning Methods</source>
          <year>2000</year>
          <publisher-loc>Cambridge, England, UK</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: A scalable tree boosting system</article-title>
          <source>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2016</year>
          <month>08</month>
          <conf-name>KDD '16: The 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 13 - 17, 2016</conf-date>
          <conf-loc>San Francisco, California, USA</conf-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>785</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Horwitz</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Toh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using machine learning to identify health outcomes from electronic health record data</article-title>
          <source>Curr Epidemiol Rep</source>
          <year>2018</year>
          <month>12</month>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>331</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30555773"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40471-018-0165-9</pub-id>
          <pub-id pub-id-type="medline">30555773</pub-id>
          <pub-id pub-id-type="pmcid">PMC6289196</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Buckley</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Term-weighting approaches in automatic text retrieval</article-title>
          <source>Inf Process Manag</source>
          <year>1988</year>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>513</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1016/0306-4573(88)90021-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Glove: Global vectors for word representation</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>
          <year>2014</year>
          <conf-name>2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>October 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>A</fpage>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>NIPS'13: Proceedings of the 26th International Conference on Neural Information Processing Systems</source>
          <year>2013</year>
          <conf-name>26th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 5 - 10, 2013</conf-date>
          <conf-loc>Lake Tahoe Nevada</conf-loc>
          <fpage>3111</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/2999792.2999959"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2018</year>
          <conf-name>Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 2018</conf-date>
          <conf-loc>New Orleans, Louisiana</conf-loc>
          <fpage>4171</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/n18-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forrest</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Forrest</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Clegg</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>de la Motte</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Amaral</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Grossman</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Furth</surname>
              <given-names>SL</given-names>
            </name>
          </person-group>
          <article-title>Establishing the content validity of PROMIS pediatric pain interference, fatigue, sleep disturbance, and sleep-related impairment measures in children with chronic kidney disease and crohn's disease</article-title>
          <source>J Patient Rep Outcomes</source>
          <year>2020</year>
          <month>02</month>
          <day>12</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32052205"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s41687-020-0178-2</pub-id>
          <pub-id pub-id-type="medline">32052205</pub-id>
          <pub-id pub-id-type="pii">10.1186/s41687-020-0178-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7016154</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <article-title>PROMIS measures for pediatric self-report (ages 8-17) and parent proxy report (ages 5-17)</article-title>
          <source>Health Measures</source>
          <access-date>2021-09-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthmeasures.net/explore-measurement-systems/promis/intro-to-promis/list-of-pediatric-measures">https://www.healthmeasures.net/explore-measurement-systems/promis/intro-to-promis/list-of-pediatric-measures</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Varni</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Stucky</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Thissen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dewitt</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Irwin</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yeatts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dewalt</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>PROMIS pediatric pain interference scale: An item response theory analysis of the pediatric pain item bank</article-title>
          <source>J Pain</source>
          <year>2010</year>
          <month>11</month>
          <volume>11</volume>
          <issue>11</issue>
          <fpage>1109</fpage>
          <lpage>19</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20627819"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jpain.2010.02.005</pub-id>
          <pub-id pub-id-type="medline">20627819</pub-id>
          <pub-id pub-id-type="pii">S1526-5900(10)00326-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC3129595</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stucky</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Thissen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Varni</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>DeWitt</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Irwin</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Yeatts</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>DeWalt</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Development and psychometric properties of the PROMIS(®) pediatric fatigue item banks</article-title>
          <source>Qual Life Res</source>
          <year>2013</year>
          <month>11</month>
          <volume>22</volume>
          <issue>9</issue>
          <fpage>2417</fpage>
          <lpage>27</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23378106"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11136-013-0357-1</pub-id>
          <pub-id pub-id-type="medline">23378106</pub-id>
          <pub-id pub-id-type="pmcid">PMC3695011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bevans</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Pajer</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Becker</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Carle</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Forrest</surname>
              <given-names>CB</given-names>
            </name>
          </person-group>
          <article-title>Psychometric evaluation of the PROMIS® pediatric psychological and physical stress experiences measures</article-title>
          <source>J Pediatr Psychol</source>
          <year>2018</year>
          <month>07</month>
          <day>01</day>
          <volume>43</volume>
          <issue>6</issue>
          <fpage>678</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29490050"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jpepsy/jsy010</pub-id>
          <pub-id pub-id-type="medline">29490050</pub-id>
          <pub-id pub-id-type="pii">4912417</pub-id>
          <pub-id pub-id-type="pmcid">PMC6005079</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nowinski</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zelko</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wortman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Burns</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nordli</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Cella</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Validation of the Neuro-QoL measurement system in children with epilepsy</article-title>
          <source>Epilepsy Behav</source>
          <year>2015</year>
          <month>05</month>
          <volume>46</volume>
          <fpage>209</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25862469"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.yebeh.2015.02.038</pub-id>
          <pub-id pub-id-type="medline">25862469</pub-id>
          <pub-id pub-id-type="pii">S1525-5050(15)00098-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC4458416</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ravens-Sieberer</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Devine</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bevans</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salsman</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Forrest</surname>
              <given-names>CB</given-names>
            </name>
          </person-group>
          <article-title>Subjective well-being measures for children were developed within the PROMIS project: Presentation of first results</article-title>
          <source>J Clin Epidemiol</source>
          <year>2014</year>
          <month>02</month>
          <volume>67</volume>
          <issue>2</issue>
          <fpage>207</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24295987"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2013.08.018</pub-id>
          <pub-id pub-id-type="medline">24295987</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(13)00380-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4120943</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ibragimova</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pless</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Adolfsson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Granlund</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Björck-Åkesson</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Using content analysis to link texts on assessment and intervention to the international classification of functioning, disability and health - version for children and youth (ICF-CY)</article-title>
          <source>J Rehabil Med</source>
          <year>2011</year>
          <month>07</month>
          <volume>43</volume>
          <issue>8</issue>
          <fpage>728</fpage>
          <lpage>33</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medicaljournals.se/jrm/content/abstract/10.2340/16501977-0831"/>
          </comment>
          <pub-id pub-id-type="doi">10.2340/16501977-0831</pub-id>
          <pub-id pub-id-type="medline">21732007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hwang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liou</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bedell</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Granlund</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Teng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of the ICF-CY-based functioning scale of the disability evaluation system--child version in Taiwan</article-title>
          <source>J Formos Med Assoc</source>
          <year>2015</year>
          <month>12</month>
          <volume>114</volume>
          <issue>12</issue>
          <fpage>1170</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0929-6646(15)00354-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jfma.2015.11.002</pub-id>
          <pub-id pub-id-type="medline">26705138</pub-id>
          <pub-id pub-id-type="pii">S0929-6646(15)00354-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Güeita-Rodríguez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Florencio</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Arias-Buría</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Lambeck</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fernández-de-Las-Peñas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Palacios-Ceña</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Content comparison of aquatic therapy outcome measures for children with neuromuscular and neurodevelopmental disorders using the international classification of functioning, disability, and health</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2019</year>
          <month>11</month>
          <day>02</day>
          <volume>16</volume>
          <issue>21</issue>
          <fpage>4263</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph16214263"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph16214263</pub-id>
          <pub-id pub-id-type="medline">31684043</pub-id>
          <pub-id pub-id-type="pii">ijerph16214263</pub-id>
          <pub-id pub-id-type="pmcid">PMC6862466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Cella</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chassany</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Fairclough</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>GY</given-names>
            </name>
            <name name-style="western">
              <surname>Hays</surname>
              <given-names>RD</given-names>
            </name>
            <collab>Clinical Significance Consensus Meeting Group</collab>
          </person-group>
          <article-title>Precision of health-related quality-of-life data compared with other clinical measures</article-title>
          <source>Mayo Clin Proc</source>
          <year>2007</year>
          <month>10</month>
          <volume>82</volume>
          <issue>10</issue>
          <fpage>1244</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.4065/82.10.1244</pub-id>
          <pub-id pub-id-type="medline">17908530</pub-id>
          <pub-id pub-id-type="pii">S0025-6196(11)61397-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>Advances in Neural Information Processing Systems</source>
          <year>2017</year>
          <access-date>2021-09-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf">https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kiros</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zemel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Urtasun</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Torralba</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Aligning books and movies: Towards story-like visual explanations by watching movies and reading books</article-title>
          <source>Proceedings of the IEEE International Conference on Computer Vision (ICCV)</source>
          <year>2015</year>
          <month>12</month>
          <conf-name>2015 IEEE International Conference on Computer Vision (ICCV)</conf-name>
          <conf-date>Dec. 7-13, 2015</conf-date>
          <conf-loc>Santiago, Chile</conf-loc>
          <pub-id pub-id-type="doi">10.1109/iccv.2015.11</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshua</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Deep Learning</source>
          <year>2016</year>
          <publisher-loc>Boston, Massachusetts, USA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>ZS</given-names>
            </name>
          </person-group>
          <article-title>Distributional Structure</article-title>
          <source>Word</source>
          <year>2015</year>
          <month>12</month>
          <day>04</day>
          <volume>10</volume>
          <issue>2-3</issue>
          <fpage>146</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1080/00437956.1954.11659520</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sadeghi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>McClelland</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>You shall know an object by the company it keeps: An investigation of semantic representations derived from object co-occurrence in visual scenes</article-title>
          <source>Neuropsychologia</source>
          <year>2015</year>
          <month>09</month>
          <volume>76</volume>
          <fpage>52</fpage>
          <lpage>61</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0028-3932(14)00294-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.neuropsychologia.2014.08.031</pub-id>
          <pub-id pub-id-type="medline">25196838</pub-id>
          <pub-id pub-id-type="pii">S0028-3932(14)00294-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4589736</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bojanowski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Enriching word vectors with subword information</article-title>
          <source>Trans Assoc Comput Linguist</source>
          <year>2017</year>
          <month>12</month>
          <volume>5</volume>
          <fpage>135</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00051</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bojanowski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Learning word vectors for 157 languages</article-title>
          <source>Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)</source>
          <year>2018</year>
          <conf-name>Eleventh International Conference on Language Resources and Evaluation (LREC )</conf-name>
          <conf-date>May 2018</conf-date>
          <conf-loc>Miyazaki, Japan</conf-loc>
          <publisher-name>European Language Resources Association (ELRA)</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/L18-1550"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Transfer learning in biomedical natural language processing: An evaluation of BERT and ELMo on ten benchmarking datasets</article-title>
          <source>Proceedings of the 18th BioNLP Workshop and Shared Task</source>
          <year>2019</year>
          <conf-name>18th BioNLP Workshop and Shared Task</conf-name>
          <conf-date>August, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <pub-id pub-id-type="doi">10.18653/v1/w19-5006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Boag</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jindi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Publicly available clinical BERT embeddings</article-title>
          <source>Proceedings of the 2nd Clinical Natural Language Processing Workshop</source>
          <year>2019</year>
          <conf-name>2nd Clinical Natural Language Processing Workshop</conf-name>
          <conf-date>June 2019</conf-date>
          <conf-loc>Minneapolis, Minnesota, USA</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <pub-id pub-id-type="doi">10.18653/v1/w19-1909</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bishop</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <source>Pattern Recognition and Machine Learning</source>
          <year>2006</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saito</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rehmsmeier</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <month>03</month>
          <day>04</day>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>e0118432</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0118432"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0118432</pub-id>
          <pub-id pub-id-type="medline">25738806</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-26790</pub-id>
          <pub-id pub-id-type="pmcid">PMC4349800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <article-title>Zhaohualu - nlp4pro1</article-title>
          <source>GitHub</source>
          <access-date>2021-09-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/zhaohualu/nlp4pro1">https://github.com/zhaohualu/nlp4pro1</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yim</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yetisgen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>WP</given-names>
            </name>
            <name name-style="western">
              <surname>Kwan</surname>
              <given-names>SW</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing in oncology: a review</article-title>
          <source>JAMA Oncol</source>
          <year>2016</year>
          <month>06</month>
          <day>01</day>
          <volume>2</volume>
          <issue>6</issue>
          <fpage>797</fpage>
          <lpage>804</lpage>
          <pub-id pub-id-type="doi">10.1001/jamaoncol.2016.0213</pub-id>
          <pub-id pub-id-type="medline">27124593</pub-id>
          <pub-id pub-id-type="pii">2517402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heintzelman</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Simonsen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lustig</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Anderko</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Haythornthwaite</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Childs</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Bova</surname>
              <given-names>GS</given-names>
            </name>
          </person-group>
          <article-title>Longitudinal analysis of pain in patients with metastatic prostate cancer using natural language processing of medical record text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>898</fpage>
          <lpage>905</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23144336"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2012-001076</pub-id>
          <pub-id pub-id-type="medline">23144336</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2012-001076</pub-id>
          <pub-id pub-id-type="pmcid">PMC3756253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Soguero-Ruiz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Oyvind</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Lindsetmo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kouskoumvekaki</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Girolami</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Olav</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Augestad</surname>
              <given-names>KM</given-names>
            </name>
          </person-group>
          <article-title>Analysis of free text in electronic health records for identification of cancer patient trajectories</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <month>04</month>
          <day>07</day>
          <volume>7</volume>
          <fpage>46226</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/srep46226"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep46226</pub-id>
          <pub-id pub-id-type="medline">28387314</pub-id>
          <pub-id pub-id-type="pii">srep46226</pub-id>
          <pub-id pub-id-type="pmcid">PMC5384191</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soleymani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jou</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pantic</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A survey of multimodal sentiment analysis</article-title>
          <source>Image Vis Comput</source>
          <year>2017</year>
          <month>09</month>
          <volume>65</volume>
          <fpage>3</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1016/j.imavis.2017.08.003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rolfes</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Seabright</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Voge</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bachman</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Kita</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Croghan</surname>
              <given-names>IT</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Juhn</surname>
              <given-names>YJ</given-names>
            </name>
          </person-group>
          <article-title>Application of a natural language processing algorithm to asthma ascertainment: an automated chart review</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2017</year>
          <month>08</month>
          <day>15</day>
          <volume>196</volume>
          <issue>4</issue>
          <fpage>430</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28375665"/>
          </comment>
          <pub-id pub-id-type="doi">10.1164/rccm.201610-2006OC</pub-id>
          <pub-id pub-id-type="medline">28375665</pub-id>
          <pub-id pub-id-type="pmcid">PMC5564673</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Developing a scalable FHIR-based clinical data normalization pipeline for standardizing and integrating unstructured and structured electronic health record data</article-title>
          <source>JAMIA Open</source>
          <year>2019</year>
          <month>10</month>
          <day>18</day>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>570</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32025655"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooz056</pub-id>
          <pub-id pub-id-type="medline">32025655</pub-id>
          <pub-id pub-id-type="pii">ooz056</pub-id>
          <pub-id pub-id-type="pmcid">PMC6993992</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bozkurt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Coquet</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Banerjee</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Phenotyping severity of patient-centered outcomes using clinical notes: A prostate cancer use case</article-title>
          <source>Learn Health Syst</source>
          <year>2020</year>
          <month>07</month>
          <day>17</day>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e10237</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/lrh2.10237"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/lrh2.10237</pub-id>
          <pub-id pub-id-type="medline">33083539</pub-id>
          <pub-id pub-id-type="pii">LRH210237</pub-id>
          <pub-id pub-id-type="pmcid">PMC7556418</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beltagy</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>SciBERT: A pretrained language model for scientific text</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <conf-name>2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name>
          <conf-date>November, 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <month>05</month>
          <day>04</day>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27219127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rotmensch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Halpern</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tlimat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Horng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Learning a health knowledge graph from electronic medical records</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <month>07</month>
          <day>20</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>5994</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.doi.org/10.1038/s41598-017-05778-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-017-05778-z</pub-id>
          <pub-id pub-id-type="medline">28729710</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-017-05778-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC5519723</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>An overview of multi-task learning</article-title>
          <source>Natl Sci Rev</source>
          <year>2017</year>
          <month>09</month>
          <day>01</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>30</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1093/nsr/nwx105</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
